예제 #1
0
class OptionsFingerprinterTest(BaseTest):

  def setUp(self):
    super(OptionsFingerprinterTest, self).setUp()
    self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)

  def test_fingerprint_dict(self):
    d1 = {'b': 1, 'a': 2}
    d2 = {'a': 2, 'b': 1}
    d3 = {'a': 1, 'b': 2}
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(dict_option, d)
                     for d in (d1, d2, d3))
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_list(self):
    l1 = [1, 2, 3]
    l2 = [1, 3, 2]
    fp1, fp2 = (self.options_fingerprinter.fingerprint(list_option, l)
                     for l in (l1, l2))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_specs(self):
    specs = [':t1', ':t2', ':t3']
    payloads = [Payload() for i in range(3)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2, s3 = specs

    fp_specs = lambda specs: self.options_fingerprinter.fingerprint(target_list_option, specs)
    fp1 = fp_specs([s1, s2])
    fp2 = fp_specs([s2, s1])
    fp3 = fp_specs([s1, s3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_file(self):
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(file_option,
                                                            self.create_file(f, contents=c))
                     for (f, c) in (('foo/bar.config', 'blah blah blah'),
                                    ('foo/bar.config', 'meow meow meow'),
                                    ('spam/egg.config', 'blah blah blah')))
    self.assertNotEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)
    self.assertNotEquals(fp2, fp3)

  def test_fingerprint_primitive(self):
    fp1, fp2 = (self.options_fingerprinter.fingerprint('', v) for v in ('foo', 5))
    self.assertNotEquals(fp1, fp2)
예제 #2
0
파일: task.py 프로젝트: wonlay/pants
  def __init__(self, context, workdir):
    """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.

    :API: public
    """
    super(TaskBase, self).__init__()
    self.context = context
    self._workdir = workdir

    self._cache_key_errors = set()

    self._build_invalidator_dir = os.path.join(
      self.context.options.for_global_scope().pants_workdir,
      'build_invalidator',
      self.stable_name())

    self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

    self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)
예제 #3
0
파일: task.py 프로젝트: cheister/pants
  def __init__(self, context, workdir):
    """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.
    """
    super(TaskBase, self).__init__()
    self.context = context
    self._workdir = workdir
    # TODO: It would be nice to use self.get_options().cache_key_gen_version here, because then
    # we could have a separate value for each scope if we really wanted to. However we can't
    # access per-task options in Task.__init__ because GroupTask.__init__ calls it with the
    # group task's scope, which isn't currently in the known scopes we generate options for.
    self._cache_key_generator = CacheKeyGenerator(
      self.context.options.for_global_scope().cache_key_gen_version)

    self._cache_key_errors = set()

    self._build_invalidator_dir = os.path.join(
      self.context.options.for_global_scope().pants_workdir,
      'build_invalidator',
      self.stable_name())

    self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

    self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)
    self._fingerprint = None
예제 #4
0
 def options_fingerprint(self):
   return OptionsFingerprinter.combined_options_fingerprint_for_scope(
     GLOBAL_SCOPE,
     self._bootstrap_options,
     fingerprint_key='daemon',
     invert=True
   )
예제 #5
0
 def _options_fingerprint(self, scope):
   options_hasher = sha1()
   options_hasher.update(scope.encode('utf-8'))
   options_fp = OptionsFingerprinter.combined_options_fingerprint_for_scope(
     scope,
     self.context.options,
     build_graph=self.context.build_graph,
     include_passthru=self.supports_passthru_args(),
   )
   options_hasher.update(options_fp.encode('utf-8'))
   return options_hasher.hexdigest() if PY3 else options_hasher.hexdigest().decode('utf-8')
예제 #6
0
파일: task.py 프로젝트: wonlay/pants
class TaskBase(SubsystemClientMixin, Optionable, AbstractClass):
  """Defines a lifecycle that prepares a task for execution and provides the base machinery
  needed to execute it.

  Provides the base lifecycle methods that allow a task to interact with the command line, other
  tasks and the user.  The lifecycle is linear and run via the following sequence:
  1. register_options - declare options configurable via cmd-line flag or config file.
  2. product_types - declare the product types your task is capable of producing.
  3. alternate_target_roots - propose a different set of target roots to use than those specified
                              via the CLI for the active pants run.
  4. prepare - request any products needed from other tasks.
  5. __init__ - distill configuration into the information needed to execute.

  Provides access to the current run context for scoping work.

  Also provides the basic facilities for doing work efficiently including providing a work directory
  for scratch space on disk, an invalidator for checking which targets need work done on, and an
  artifact cache for re-using previously cached work.

  #TODO(John Sirois):  Lifecycle is currently split between TaskBase and Task and lifecycle
  (interface) and helpers (utility) are currently conflated.  Tease these apart and narrow the scope
  of the helpers.  Ideally console tasks don't inherit a workdir, invalidator or build cache for
  example.
  """
  options_scope_category = ScopeInfo.TASK

  # We set this explicitly on the synthetic subclass, so that it shares a stable name with
  # its superclass, which is not necessary for regular use, but can be convenient in tests.
  _stable_name = None

  @classmethod
  def implementation_version(cls):
    """
    :API: public
    """
    return [('TaskBase', 2)]

  @classmethod
  @memoized_method
  def implementation_version_str(cls):
    return '.'.join(['_'.join(map(str, x)) for x in cls.implementation_version()])

  @classmethod
  def stable_name(cls):
    """The stable name of this task type.

    We synthesize subclasses of the task types at runtime, and these synthesized subclasses
    may have random names (e.g., in tests), so this gives us a stable name to use across runs,
    e.g., in artifact cache references.
    """
    return cls._stable_name or cls._compute_stable_name()

  @classmethod
  def _compute_stable_name(cls):
    return '{}_{}'.format(cls.__module__, cls.__name__).replace('.', '_')

  @classmethod
  def subsystem_dependencies(cls):
    return super(TaskBase, cls).subsystem_dependencies() + (CacheSetup.scoped(cls),)

  @classmethod
  def product_types(cls):
    """The list of products this Task produces. Set the product type(s) for this
    task i.e. the product type(s) this task creates e.g ['classes'].

    By default, each task is considered as creating a unique product type(s).
    Subclasses that create products, should override this to specify their unique product type(s).

    :API: public
    """
    return []

  @classmethod
  def known_scope_infos(cls):
    """Yields ScopeInfo for all known scopes for this task, in no particular order."""
    # The task's own scope.
    yield cls.get_scope_info()
    # The scopes of any task-specific subsystems it uses.
    for dep in cls.subsystem_dependencies_iter():
      if not dep.is_global():
        yield dep.subsystem_cls.get_scope_info(subscope=dep.scope)

  @classmethod
  def supports_passthru_args(cls):
    """Subclasses may override to indicate that they can use passthru args.

    :API: public
    """
    return False

  @classmethod
  def _scoped_options(cls, options):
    return options[cls.options_scope]

  @classmethod
  def get_alternate_target_roots(cls, options, address_mapper, build_graph):
    # Subclasses should not generally need to override this method.
    return cls.alternate_target_roots(cls._scoped_options(options), address_mapper, build_graph)

  @classmethod
  def alternate_target_roots(cls, options, address_mapper, build_graph):
    """Allows a Task to propose alternate target roots from those specified on the CLI.

    At most 1 unique proposal is allowed amongst all tasks involved in the run.  If more than 1
    unique list of target roots is proposed an error is raised during task scheduling.

    :API: public

    :returns list: The new target roots to use or None to accept the CLI specified target roots.
    """

  @classmethod
  def invoke_prepare(cls, options, round_manager):
    # Subclasses should not generally need to override this method.
    return cls.prepare(cls._scoped_options(options), round_manager)

  @classmethod
  def prepare(cls, options, round_manager):
    """Prepares a task for execution.

    Called before execution and prior to any tasks that may be (indirectly) depended upon.

    Typically a task that requires products from other goals would register interest in those
    products here and then retrieve the requested product mappings when executed.

    :API: public
    """

  def __init__(self, context, workdir):
    """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.

    :API: public
    """
    super(TaskBase, self).__init__()
    self.context = context
    self._workdir = workdir

    self._cache_key_errors = set()

    self._build_invalidator_dir = os.path.join(
      self.context.options.for_global_scope().pants_workdir,
      'build_invalidator',
      self.stable_name())

    self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

    self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)

  def get_options(self):
    """Returns the option values for this task's scope.

    :API: public
    """
    return self.context.options.for_scope(self.options_scope)

  def get_passthru_args(self):
    """
    :API: public
    """
    if not self.supports_passthru_args():
      raise TaskError('{0} Does not support passthru args.'.format(self.stable_name()))
    else:
      return self.context.options.passthru_args_for_scope(self.options_scope)

  @property
  def workdir(self):
    """A scratch-space for this task that will be deleted by `clean-all`.

    It's not guaranteed that the workdir exists, just that no other task has been given this
    workdir path to use.

    :API: public
    """
    return self._workdir

  def _options_fingerprint(self, scope):
    pairs = self.context.options.get_fingerprintable_for_scope(scope)
    hasher = sha1()
    for (option_type, option_val) in pairs:
      fp = self._options_fingerprinter.fingerprint(option_type, option_val)
      if fp is not None:
        hasher.update(fp)
    return hasher.hexdigest()

  @memoized_property
  def fingerprint(self):
    """Returns a fingerprint for the identity of the task.

    A task fingerprint is composed of the options the task is currently running under.
    Useful for invalidating unchanging targets being executed beneath changing task
    options that affect outputted artifacts.

    A task's fingerprint is only valid afer the task has been fully initialized.
    """
    hasher = sha1()
    hasher.update(self._options_fingerprint(self.options_scope))
    hasher.update(self.implementation_version_str())
    # TODO: this is not recursive, but should be: see #2739
    for dep in self.subsystem_dependencies_iter():
      hasher.update(self._options_fingerprint(dep.options_scope()))
    return str(hasher.hexdigest())

  def artifact_cache_reads_enabled(self):
    return self._cache_factory.read_cache_available()

  def artifact_cache_writes_enabled(self):
    return self._cache_factory.write_cache_available()

  def invalidate(self):
    """Invalidates all targets for this task."""
    BuildInvalidator(self._build_invalidator_dir).force_invalidate_all()

  @property
  def create_target_dirs(self):
    """Whether to create a results_dir per VersionedTarget in the workdir of the Task.

    This defaults to the value of `self.cache_target_dirs` (as caching them requires
    creating them), but may be overridden independently to create the dirs without caching
    them.

    :API: public
    """
    return self.cache_target_dirs or False

  @property
  def cache_target_dirs(self):
    """Whether to cache files in VersionedTarget's results_dir after exiting an invalidated block.

    Subclasses may override this method to return True if they wish to use this style
    of "automated" caching, where each VersionedTarget is given an associated results directory,
    which will automatically be uploaded to the cache. Tasks should place the output files
    for each VersionedTarget in said results directory. It is highly suggested to follow this
    schema for caching, rather than manually making updates to the artifact cache.

    :API: public
    """
    return False

  @property
  def incremental(self):
    """Whether this Task implements incremental building of individual targets.

    Incremental tasks with `cache_target_dirs` set will have the results_dir of the previous build
    for a target cloned into the results_dir for the current build (where possible). This
    copy-on-write behaviour allows for immutability of the results_dir once a target has been
    marked valid.

    :API: public
    """
    return False

  @property
  def cache_incremental(self):
    """For incremental tasks, indicates whether the results of incremental builds should be cached.

    Deterministic per-target incremental compilation is a relatively difficult thing to implement,
    so this property provides an escape hatch to avoid caching things in that riskier case.

    :API: public
    """
    return False

  @contextmanager
  def invalidated(self,
                  targets,
                  invalidate_dependents=False,
                  silent=False,
                  fingerprint_strategy=None,
                  topological_order=False):
    """Checks targets for invalidation, first checking the artifact cache.

    Subclasses call this to figure out what to work on.

    :API: public

    :param targets: The targets to check for changes.
    :param invalidate_dependents: If True then any targets depending on changed targets are
                                  invalidated.
    :param silent: If true, suppress logging information about target invalidation.
    :param fingerprint_strategy: A FingerprintStrategy instance, which can do per task,
                                finer grained fingerprinting of a given Target.
    :param topological_order: Whether to invalidate in dependency order.

    If no exceptions are thrown by work in the block, the build cache is updated for the targets.
    Note: the artifact cache is not updated. That must be done manually.

    :returns: Yields an InvalidationCheck object reflecting the targets.
    :rtype: InvalidationCheck
    """

    cache_key_generator = CacheKeyGenerator(
      self.context.options.for_global_scope().cache_key_gen_version,
      self.fingerprint)
    cache_manager = InvalidationCacheManager(self.workdir,
                                             cache_key_generator,
                                             self._build_invalidator_dir,
                                             invalidate_dependents,
                                             fingerprint_strategy=fingerprint_strategy,
                                             invalidation_report=self.context.invalidation_report,
                                             task_name=type(self).__name__,
                                             task_version=self.implementation_version_str(),
                                             artifact_write_callback=self.maybe_write_artifact)

    invalidation_check = cache_manager.check(targets, topological_order=topological_order)

    self._maybe_create_results_dirs(invalidation_check.all_vts)

    if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled():
      with self.context.new_workunit('cache'):
        cached_vts, uncached_vts, uncached_causes = \
          self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check))
      if cached_vts:
        cached_targets = [vt.target for vt in cached_vts]
        self.context.run_tracker.artifact_cache_stats.add_hits(cache_manager.task_name,
                                                               cached_targets)
        if not silent:
          self._report_targets('Using cached artifacts for ', cached_targets, '.')
      if uncached_vts:
        uncached_targets = [vt.target for vt in uncached_vts]
        self.context.run_tracker.artifact_cache_stats.add_misses(cache_manager.task_name,
                                                                 uncached_targets,
                                                                 uncached_causes)
        if not silent:
          self._report_targets('No cached artifacts for ', uncached_targets, '.')
      # Now that we've checked the cache, re-partition whatever is still invalid.
      invalidation_check = \
        InvalidationCheck(invalidation_check.all_vts, uncached_vts)

    if not silent:
      targets = []
      for vt in invalidation_check.invalid_vts:
        targets.extend(vt.targets)

      if len(targets):
        msg_elements = ['Invalidated ',
                        items_to_report_element([t.address.reference() for t in targets], 'target'),
                        '.']
        self.context.log.info(*msg_elements)

    invalidation_report = self.context.invalidation_report
    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid,
                                    phase='pre-check')

    # Cache has been checked to create the full list of invalid VTs.
    # Only copy previous_results for this subset of VTs.
    for vts in invalidation_check.invalid_vts:
      if self.incremental:
        vts.copy_previous_results(self.workdir)

    # Yield the result, and then mark the targets as up to date.
    yield invalidation_check

    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid,
                                    phase='post-check')

    for vt in invalidation_check.invalid_vts:
      vt.update()

    # Background work to clean up previous builds.
    if self.context.options.for_global_scope().workdir_max_build_entries is not None:
      self._launch_background_workdir_cleanup(invalidation_check.all_vts)

  def maybe_write_artifact(self, vt):
    if self._should_cache_target_dir(vt):
      self.update_artifact_cache([(vt, [vt.current_results_dir])])

  def _launch_background_workdir_cleanup(self, vts):
    workdir_build_cleanup_job = Work(self._cleanup_workdir_stale_builds, [(vts,)], 'workdir_build_cleanup')
    self.context.submit_background_work_chain([workdir_build_cleanup_job])

  def _cleanup_workdir_stale_builds(self, vts):
    # workdir_max_build_entries has been assured of not None before invoking this method.
    max_entries_per_target = max(2, self.context.options.for_global_scope().workdir_max_build_entries)
    for vt in vts:
      live_dirs = list(vt.live_dirs())
      if not live_dirs:
        continue
      root_dir = os.path.dirname(vt.results_dir)
      safe_rm_oldest_items_in_dir(root_dir, max_entries_per_target, excludes=live_dirs)

  def _should_cache_target_dir(self, vt):
    """Return true if the given vt should be written to a cache (if configured)."""
    return (
      self.cache_target_dirs and
      not vt.target.has_label('no_cache') and
      (not vt.is_incremental or self.cache_incremental) and
      self.artifact_cache_writes_enabled()
    )

  def _maybe_create_results_dirs(self, vts):
    """If `cache_target_dirs`, create results_dirs for the given versioned targets."""
    if self.create_target_dirs:
      for vt in vts:
        vt.create_results_dir()

  def check_artifact_cache_for(self, invalidation_check):
    """Decides which VTS to check the artifact cache for.

    By default we check for each invalid target. Can be overridden, e.g., to
    instead check only for a single artifact for the entire target set.
    """
    return invalidation_check.invalid_vts

  def check_artifact_cache(self, vts):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a tuple (cached, uncached, uncached_causes) of VersionedTargets that were
    satisfied/unsatisfied from the cache. Uncached VTS are also attached with their
    causes for the miss: `False` indicates a legit miss while `UnreadableArtifact`
    is due to either local or remote cache failures.
    """
    return self.do_check_artifact_cache(vts)

  def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a pair (cached, uncached) of VersionedTargets that were
    satisfied/unsatisfied from the cache.
    """
    if not vts:
      return [], [], []

    read_cache = self._cache_factory.get_read_cache()
    items = [(read_cache, vt.cache_key, vt.current_results_dir if self.cache_target_dirs else None)
             for vt in vts]
    res = self.context.subproc_map(call_use_cached_files, items)

    cached_vts = []
    uncached_vts = []
    uncached_causes = []

    # Note that while the input vts may represent multiple targets (for tasks that overrride
    # check_artifact_cache_for), the ones we return must represent single targets.
    # Once flattened, cached/uncached vts are in separate lists. Each uncached vts is paired
    # with why it is missed for stat reporting purpose.
    for vt, was_in_cache in zip(vts, res):
      if was_in_cache:
        cached_vts.extend(vt.versioned_targets)
      else:
        uncached_vts.extend(vt.versioned_targets)
        uncached_causes.extend(repeat(was_in_cache, len(vt.versioned_targets)))
        if isinstance(was_in_cache, UnreadableArtifact):
          self._cache_key_errors.update(was_in_cache.key)

    if post_process_cached_vts:
      post_process_cached_vts(cached_vts)
    for vt in cached_vts:
      vt.update()
    return cached_vts, uncached_vts, uncached_causes

  def update_artifact_cache(self, vts_artifactfiles_pairs):
    """Write to the artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of absolute paths to artifacts for the VersionedTargetSet.
    """
    update_artifact_cache_work = self._get_update_artifact_cache_work(vts_artifactfiles_pairs)
    if update_artifact_cache_work:
      self.context.submit_background_work_chain([update_artifact_cache_work],
                                                parent_workunit_name='cache')

  def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
    """Create a Work instance to update an artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of paths to artifacts for the VersionedTargetSet.
    """
    cache = self._cache_factory.get_write_cache()
    if cache:
      if len(vts_artifactfiles_pairs) == 0:
        return None
        # Do some reporting.
      targets = set()
      for vts, _ in vts_artifactfiles_pairs:
        targets.update(vts.targets)

      self._report_targets(
        'Caching artifacts for ',
        list(targets),
        '.',
        logger=self.context.log.debug,
      )

      always_overwrite = self._cache_factory.overwrite()

      # Cache the artifacts.
      args_tuples = []
      for vts, artifactfiles in vts_artifactfiles_pairs:
        overwrite = always_overwrite or vts.cache_key in self._cache_key_errors
        args_tuples.append((cache, vts.cache_key, artifactfiles, overwrite))

      return Work(lambda x: self.context.subproc_map(call_insert, x), [(args_tuples,)], 'insert')
    else:
      return None

  def _report_targets(self, prefix, targets, suffix, logger=None):
    logger = logger or self.context.log.info
    logger(
      prefix,
      items_to_report_element([t.address.reference() for t in targets], 'target'),
      suffix,
    )

  def require_single_root_target(self):
    """If a single target was specified on the cmd line, returns that target.

    Otherwise throws TaskError.

    :API: public
    """
    target_roots = self.context.target_roots
    if len(target_roots) == 0:
      raise TaskError('No target specified.')
    elif len(target_roots) > 1:
      raise TaskError('Multiple targets specified: {}'
                      .format(', '.join([repr(t) for t in target_roots])))
    return target_roots[0]

  def determine_target_roots(self, goal_name, predicate=None):
    """Helper for tasks that scan for default target roots.

    :param string goal_name: The goal name to use for any warning emissions.
    :param callable predicate: The predicate to pass to `context.scan().targets(predicate=X)`.
    """
    deprecated_conditional(
        lambda: not self.context.target_roots,
        '1.5.0.dev0',
        '`./pants {0}` (with no explicit targets) will soon become an error. Please specify '
        'one or more explicit target specs (e.g. `./pants {0} ::`).'.format(goal_name))
    if not self.context.target_roots and not self.get_options().enable_v2_engine:
      # For the v1 path, continue the behavior of e.g. `./pants list` implies `./pants list ::`.
      return self.context.scan().targets(predicate=predicate)

    # For the v2 path, e.g. `./pants list` is a functional no-op. This matches the v2 mode behavior
    # of e.g. `./pants --changed-parent=HEAD list` (w/ no changes) returning an empty result.
    return self.context.target_roots
예제 #7
0
파일: task.py 프로젝트: TansyArron/pants
class TaskBase(Optionable, AbstractClass):
  """Defines a lifecycle that prepares a task for execution and provides the base machinery
  needed to execute it.

  Provides the base lifecycle methods that allow a task to interact with the command line, other
  tasks and the user.  The lifecycle is linear and run via the following sequence:
  1. register_options - declare options configurable via cmd-line flag or config file.
  2. product_types - declare the product types your task is capable of producing.
  3. alternate_target_roots - propose a different set of target roots to use than those specified
                              via the CLI for the active pants run.
  4. prepare - request any products needed from other tasks.
  5. __init__ - distill configuration into the information needed to execute.

  Provides access to the current run context for scoping work.

  Also provides the basic facilities for doing work efficiently including providing a work directory
  for scratch space on disk, an invalidator for checking which targets need work done on, and an
  artifact cache for re-using previously cached work.

  #TODO(John Sirois):  Lifecycle is currently split between TaskBase and Task and lifecycle
  (interface) and helpers (utility) are currently conflated.  Tease these apart and narrow the scope
  of the helpers.  Ideally console tasks don't inherit a workdir, invalidator or build cache for
  example.
  """
  options_scope_category = ScopeInfo.TASK

  # Tests may override this to provide a stable name despite the class name being a unique,
  # synthetic name.
  _stable_name = None

  @classmethod
  def stable_name(cls):
    """The stable name of this task type.

    We synthesize subclasses of the task types at runtime, and these synthesized subclasses
    may have random names (e.g., in tests), so this gives us a stable name to use across runs,
    e.g., in artifact cache references.
    """
    return cls._stable_name or cls._compute_stable_name()

  @classmethod
  def _compute_stable_name(cls):
    return '{}_{}'.format(cls.__module__, cls.__name__).replace('.', '_')

  @classmethod
  def global_subsystems(cls):
    """The global subsystems this task uses.

    A tuple of subsystem types.
    """
    return tuple()

  @classmethod
  def task_subsystems(cls):
    """The private, per-task subsystems this task uses.

    A tuple of subsystem types.
    """
    return (CacheSetup,)

  @classmethod
  def product_types(cls):
    """The list of products this Task produces. Set the product type(s) for this
    task i.e. the product type(s) this task creates e.g ['classes'].

    By default, each task is considered as creating a unique product type(s).
    Subclasses that create products, should override this to specify their unique product type(s).
    """
    return []

  @classmethod
  def known_scope_infos(cls):
    """Yields ScopeInfo for all known scopes for this task, in no particular order."""
    # The task's own scope.
    yield cls.get_scope_info()
    # The scopes of any task-specific subsystems it uses.
    for subsystem in cls.task_subsystems():
      yield subsystem.get_scope_info(subscope=cls.options_scope)

  @classmethod
  def supports_passthru_args(cls):
    """Subclasses may override to indicate that they can use passthru args."""
    return False

  @classmethod
  def _scoped_options(cls, options):
    return options[cls.options_scope]

  @classmethod
  def _alternate_target_roots(cls, options, address_mapper, build_graph):
    # Subclasses should not generally need to override this method.
    # TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
    return cls.alternate_target_roots(cls._scoped_options(options), address_mapper, build_graph)

  @classmethod
  def alternate_target_roots(cls, options, address_mapper, build_graph):
    """Allows a Task to propose alternate target roots from those specified on the CLI.

    At most 1 unique proposal is allowed amongst all tasks involved in the run.  If more than 1
    unique list of target roots is proposed an error is raised during task scheduling.

    :returns list: The new target roots to use or none to accept the CLI specified target roots.
    """

  @classmethod
  def _prepare(cls, options, round_manager):
    # Subclasses should not generally need to override this method.
    # TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
    return cls.prepare(cls._scoped_options(options), round_manager)

  @classmethod
  def prepare(cls, options, round_manager):
    """Prepares a task for execution.

    Called before execution and prior to any tasks that may be (indirectly) depended upon.

    Typically a task that requires products from other goals would register interest in those
    products here and then retrieve the requested product mappings when executed.
    """

  def __init__(self, context, workdir):
    """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.
    """
    super(TaskBase, self).__init__()
    self.context = context
    self._workdir = workdir
    # TODO: It would be nice to use self.get_options().cache_key_gen_version here, because then
    # we could have a separate value for each scope if we really wanted to. However we can't
    # access per-task options in Task.__init__ because GroupTask.__init__ calls it with the
    # group task's scope, which isn't currently in the known scopes we generate options for.
    self._cache_key_generator = CacheKeyGenerator(
      self.context.options.for_global_scope().cache_key_gen_version)

    self._cache_key_errors = set()

    self._build_invalidator_dir = os.path.join(
      self.context.options.for_global_scope().pants_workdir,
      'build_invalidator',
      self.stable_name())

    self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

    self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)
    self._fingerprint = None

  def get_options(self):
    """Returns the option values for this task's scope."""
    return self.context.options.for_scope(self.options_scope)

  def get_passthru_args(self):
    if not self.supports_passthru_args():
      raise TaskError('{0} Does not support passthru args.'.format(self.stable_name()))
    else:
      return self.context.options.passthru_args_for_scope(self.options_scope)

  @property
  def workdir(self):
    """A scratch-space for this task that will be deleted by `clean-all`.

    It's not guaranteed that the workdir exists, just that no other task has been given this
    workdir path to use.
    """
    return self._workdir

  def _options_fingerprint(self, scope):
    pairs = self.context.options.get_fingerprintable_for_scope(scope)
    hasher = sha1()
    for (option_type, option_val) in pairs:
      fp = self._options_fingerprinter.fingerprint(option_type, option_val)
      if fp is not None:
        hasher.update(fp)
    return hasher.hexdigest()

  @property
  def fingerprint(self):
    """Returns a fingerprint for the identity of the task.

    A task fingerprint is composed of the options the task is currently running under.
    Useful for invalidating unchanging targets being executed beneath changing task
    options that affect outputted artifacts.

    A task's fingerprint is only valid afer the task has been fully initialized.
    """
    if not self._fingerprint:
      hasher = sha1()
      hasher.update(self._options_fingerprint(self.options_scope))
      for subsystem in self.task_subsystems():
        hasher.update(self._options_fingerprint(subsystem.subscope(self.options_scope)))
        hasher.update(self._options_fingerprint(subsystem.options_scope))
      self._fingerprint = str(hasher.hexdigest())
    return self._fingerprint

  def artifact_cache_reads_enabled(self):
    return self._cache_factory.read_cache_available()

  def artifact_cache_writes_enabled(self):
    return self._cache_factory.write_cache_available()

  def invalidate_for_files(self):
    """Provides extra files that participate in invalidation.

    Subclasses can override and return a list of full paths to extra, non-source files that should
    be checked for changes when managing target invalidation. This is useful for tracking
    changes to pre-built build tools, e.g., the thrift compiler.
    """
    return []

  def invalidate(self):
    """Invalidates all targets for this task."""
    BuildInvalidator(self._build_invalidator_dir).force_invalidate_all()

  def create_cache_manager(self, invalidate_dependents, fingerprint_strategy=None):
    """Creates a cache manager that can be used to invalidate targets on behalf of this task.

    Use this if you need to check for invalid targets but can't use the contextmanager created by
    invalidated(), e.g., because you don't want to mark the targets as valid when done.

    invalidate_dependents:   If True then any targets depending on changed targets are invalidated.
    fingerprint_strategy:    A FingerprintStrategy instance, which can do per task, finer grained
                             fingerprinting of a given Target.
    """

    return InvalidationCacheManager(self._cache_key_generator,
                                    self._build_invalidator_dir,
                                    invalidate_dependents,
                                    fingerprint_strategy=fingerprint_strategy,
                                    invalidation_report=self.context.invalidation_report,
                                    task_name=type(self).__name__)

  @property
  def cache_target_dirs(self):
    """Whether to cache files in VersionedTarget's results_dir after exiting an invalidated block.

    Subclasses may override this method to return True if they wish to use this style
    of "automated" caching, where each VersionedTarget is given an associated results directory,
    which will automatically be uploaded to the cache. Tasks should place the output files
    for each VersionedTarget in said results directory. It is highly suggested to follow this
    schema for caching, rather than manually making updates to the artifact cache.
    """
    return False

  @contextmanager
  def invalidated(self,
                  targets,
                  invalidate_dependents=False,
                  partition_size_hint=sys.maxint,
                  silent=False,
                  locally_changed_targets=None,
                  fingerprint_strategy=None,
                  topological_order=False):
    """Checks targets for invalidation, first checking the artifact cache.

    Subclasses call this to figure out what to work on.

    :param targets:               The targets to check for changes.
    :param invalidate_dependents: If True then any targets depending on changed targets are invalidated.
    :param partition_size_hint:   Each VersionedTargetSet in the yielded list will represent targets
                                  containing roughly this number of source files, if possible. Set to
                                  sys.maxint for a single VersionedTargetSet. Set to 0 for one
                                  VersionedTargetSet per target. It is up to the caller to do the right
                                  thing with whatever partitioning it asks for.
    :param locally_changed_targets: Targets that we've edited locally. If specified, and there aren't too
                                  many of them, we keep these in separate partitions from other targets,
                                  as these are more likely to have build errors, and so to be rebuilt over
                                  and over, and partitioning them separately is a performance win.
    :param fingerprint_strategy:   A FingerprintStrategy instance, which can do per task, finer grained
                                  fingerprinting of a given Target.

    If no exceptions are thrown by work in the block, the build cache is updated for the targets.
    Note: the artifact cache is not updated. That must be done manually.

    :returns: Yields an InvalidationCheck object reflecting the (partitioned) targets.
    :rtype: InvalidationCheck
    """

    # TODO(benjy): Compute locally_changed_targets here instead of passing it in? We currently pass
    # it in because JvmCompile already has the source->target mapping for other reasons, and also
    # to selectively enable this feature.
    fingerprint_strategy = fingerprint_strategy or TaskIdentityFingerprintStrategy(self)
    cache_manager = self.create_cache_manager(invalidate_dependents,
                                              fingerprint_strategy=fingerprint_strategy)
    # We separate locally-modified targets from others by coloring them differently.
    # This can be a performance win, because these targets are more likely to be iterated
    # over, and this preserves "chunk stability" for them.
    colors = {}

    # But we only do so if there aren't too many, or this optimization will backfire.
    locally_changed_target_limit = 10

    if locally_changed_targets and len(locally_changed_targets) < locally_changed_target_limit:
      for t in targets:
        if t in locally_changed_targets:
          colors[t] = 'locally_changed'
        else:
          colors[t] = 'not_locally_changed'
    invalidation_check = cache_manager.check(targets, partition_size_hint, colors, topological_order=topological_order)

    if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled():
      with self.context.new_workunit('cache'):
        cached_vts, uncached_vts = \
          self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check))
      if cached_vts:
        cached_targets = [vt.target for vt in cached_vts]
        for t in cached_targets:
          self.context.run_tracker.artifact_cache_stats.add_hit('default', t)
        if not silent:
          self._report_targets('Using cached artifacts for ', cached_targets, '.')
      if uncached_vts:
        uncached_targets = [vt.target for vt in uncached_vts]
        for t in uncached_targets:
          self.context.run_tracker.artifact_cache_stats.add_miss('default', t)
        if not silent:
          self._report_targets('No cached artifacts for ', uncached_targets, '.')
      # Now that we've checked the cache, re-partition whatever is still invalid.
      invalidation_check = \
        InvalidationCheck(invalidation_check.all_vts, uncached_vts, partition_size_hint, colors)

    if self.cache_target_dirs:
      for vt in invalidation_check.all_vts:
        vt.create_results_dir(os.path.join(self.workdir, vt.cache_key.hash))

    if not silent:
      targets = []
      num_invalid_partitions = len(invalidation_check.invalid_vts_partitioned)
      for vt in invalidation_check.invalid_vts_partitioned:
        targets.extend(vt.targets)

      if len(targets):
        msg_elements = ['Invalidated ',
                        items_to_report_element([t.address.reference() for t in targets], 'target')]
        if num_invalid_partitions > 1:
          msg_elements.append(' in {} target partitions'.format(num_invalid_partitions))
        msg_elements.append('.')
        self.context.log.info(*msg_elements)

    invalidation_report = self.context.invalidation_report
    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid,
                                    phase='pre-check')

    # Yield the result, and then mark the targets as up to date.
    yield invalidation_check

    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid,
                                    phase='post-check')
    for vt in invalidation_check.invalid_vts:
      vt.update()  # In case the caller doesn't update.

    write_to_cache = (self.cache_target_dirs
                      and self.artifact_cache_writes_enabled()
                      and invalidation_check.invalid_vts)
    if write_to_cache:
      def result_files(vt):
        return [os.path.join(vt.results_dir, f) for f in os.listdir(vt.results_dir)]
      pairs = [(vt, result_files(vt)) for vt in invalidation_check.invalid_vts]
      self.update_artifact_cache(pairs)

  def check_artifact_cache_for(self, invalidation_check):
    """Decides which VTS to check the artifact cache for.

    By default we check for each invalid target. Can be overridden, e.g., to
    instead check only for a single artifact for the entire target set.
    """
    return invalidation_check.invalid_vts

  def check_artifact_cache(self, vts):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a pair (cached, uncached) of VersionedTargets that were
    satisfied/unsatisfied from the cache.
    """
    return self.do_check_artifact_cache(vts)

  def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a pair (cached, uncached) of VersionedTargets that were
    satisfied/unsatisfied from the cache.
    """
    if not vts:
      return [], []

    cached_vts = []
    uncached_vts = OrderedSet(vts)

    read_cache = self._cache_factory.get_read_cache()
    items = [(read_cache, vt.cache_key) for vt in vts]

    res = self.context.subproc_map(call_use_cached_files, items)

    for vt, was_in_cache in zip(vts, res):
      if was_in_cache:
        cached_vts.append(vt)
        uncached_vts.discard(vt)
      elif isinstance(was_in_cache, UnreadableArtifact):
        self._cache_key_errors.update(was_in_cache.key)

    # Note that while the input vts may represent multiple targets (for tasks that overrride
    # check_artifact_cache_for), the ones we return must represent single targets.
    def flatten(vts):
      return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts]))
    all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts)
    if post_process_cached_vts:
      post_process_cached_vts(all_cached_vts)
    for vt in all_cached_vts:
      vt.update()
    return all_cached_vts, all_uncached_vts

  def update_artifact_cache(self, vts_artifactfiles_pairs):
    """Write to the artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of absolute paths to artifacts for the VersionedTargetSet.
    """
    update_artifact_cache_work = self.get_update_artifact_cache_work(vts_artifactfiles_pairs)
    if update_artifact_cache_work:
      self.context.submit_background_work_chain([update_artifact_cache_work],
                                                parent_workunit_name='cache')

  def get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
    """Create a Work instance to update an artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of paths to artifacts for the VersionedTargetSet.
    """
    cache = self._cache_factory.get_write_cache()
    if cache:
      if len(vts_artifactfiles_pairs) == 0:
        return None
        # Do some reporting.
      targets = set()
      for vts, _ in vts_artifactfiles_pairs:
        targets.update(vts.targets)
      self._report_targets('Caching artifacts for ', list(targets), '.')

      always_overwrite = self._cache_factory.overwrite()

      # Cache the artifacts.
      args_tuples = []
      for vts, artifactfiles in vts_artifactfiles_pairs:
        overwrite = always_overwrite or vts.cache_key in self._cache_key_errors
        args_tuples.append((cache, vts.cache_key, artifactfiles, overwrite))

      return Work(lambda x: self.context.subproc_map(call_insert, x), [(args_tuples,)], 'insert')
    else:
      return None

  def _report_targets(self, prefix, targets, suffix):
    self.context.log.info(
      prefix,
      items_to_report_element([t.address.reference() for t in targets], 'target'),
      suffix)

  def require_single_root_target(self):
    """If a single target was specified on the cmd line, returns that target.

    Otherwise throws TaskError.
    """
    target_roots = self.context.target_roots
    if len(target_roots) == 0:
      raise TaskError('No target specified.')
    elif len(target_roots) > 1:
      raise TaskError('Multiple targets specified: {}'
                      .format(', '.join([repr(t) for t in target_roots])))
    return target_roots[0]

  def require_homogeneous_targets(self, accept_predicate, reject_predicate):
    """Ensures that there is no ambiguity in the context according to the given predicates.

    If any targets in the context satisfy the accept_predicate, and no targets satisfy the
    reject_predicate, returns the accepted targets.

    If no targets satisfy the accept_predicate, returns None.

    Otherwise throws TaskError.
    """
    if len(self.context.target_roots) == 0:
      raise TaskError('No target specified.')

    accepted = self.context.targets(accept_predicate)
    rejected = self.context.targets(reject_predicate)
    if len(accepted) == 0:
      # no targets were accepted, regardless of rejects
      return None
    elif len(rejected) == 0:
      # we have at least one accepted target, and no rejected targets
      return accepted
    else:
      # both accepted and rejected targets
      # TODO: once https://github.com/pantsbuild/pants/issues/425 lands, we should add
      # language-specific flags that would resolve the ambiguity here
      raise TaskError('Mutually incompatible targets specified: {} vs {} (and {} others)'
                      .format(accepted[0], rejected[0], len(accepted) + len(rejected) - 2))
 def setUp(self):
   super(OptionsFingerprinterTest, self).setUp()
   self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)
class OptionsFingerprinterTest(TestBase):

  def setUp(self):
    super(OptionsFingerprinterTest, self).setUp()
    self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)

  def test_fingerprint_dict(self):
    d1 = {'b': 1, 'a': 2}
    d2 = {'a': 2, 'b': 1}
    d3 = {'a': 1, 'b': 2}
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(dict_option, d)
                     for d in (d1, d2, d3))
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_list(self):
    l1 = [1, 2, 3]
    l2 = [1, 3, 2]
    fp1, fp2 = (self.options_fingerprinter.fingerprint(list_option, l)
                     for l in (l1, l2))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec(self):
    specs = [':t1', ':t2']
    payloads = [Payload() for i in range(2)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2 = specs

    fp_spec = lambda spec: self.options_fingerprinter.fingerprint(target_option, spec)
    fp1 = fp_spec(s1)
    fp2 = fp_spec(s2)
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec_list(self):
    specs = [':t1', ':t2', ':t3']
    payloads = [Payload() for i in range(3)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2, s3 = specs

    fp_specs = lambda specs: self.options_fingerprinter.fingerprint(target_option, specs)
    fp1 = fp_specs([s1, s2])
    fp2 = fp_specs([s2, s1])
    fp3 = fp_specs([s1, s3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_file(self):
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(file_option,
                                                            self.create_file(f, contents=c))
                     for (f, c) in (('foo/bar.config', 'blah blah blah'),
                                    ('foo/bar.config', 'meow meow meow'),
                                    ('spam/egg.config', 'blah blah blah')))
    self.assertNotEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)
    self.assertNotEquals(fp2, fp3)

  def test_fingerprint_file_outside_buildroot(self):
    with temporary_dir() as tmp:
      outside_buildroot = self.create_file(os.path.join(tmp, 'foobar'), contents='foobar')
      with self.assertRaises(ValueError):
        self.options_fingerprinter.fingerprint(file_option, outside_buildroot)

  def test_fingerprint_file_list(self):
    f1, f2, f3 = (self.create_file(f, contents=c) for (f, c) in
                  (('foo/bar.config', 'blah blah blah'),
                   ('foo/bar.config', 'meow meow meow'),
                   ('spam/egg.config', 'blah blah blah')))
    fp1 = self.options_fingerprinter.fingerprint(file_option, [f1, f2])
    fp2 = self.options_fingerprinter.fingerprint(file_option, [f2, f1])
    fp3 = self.options_fingerprinter.fingerprint(file_option, [f1, f3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_primitive(self):
    fp1, fp2 = (self.options_fingerprinter.fingerprint('', v) for v in ('foo', 5))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_unset_bool(self):
    fp1 = self.options_fingerprinter.fingerprint(UnsetBool, UnsetBool)
    fp2 = self.options_fingerprinter.fingerprint(UnsetBool, UnsetBool)
    self.assertEqual(fp1, fp2)

  def test_fingerprint_dir(self):
    d1 = self.create_dir('a')
    d2 = self.create_dir('b')
    d3 = self.create_dir('c')

    f1, f2, f3, f4, f5 = (self.create_file(f, contents=c) for (f, c) in (
      ('a/bar/bar.config', 'blah blah blah'),
      ('a/foo/foo.config', 'meow meow meow'),
      ('b/foo/foo.config', 'meow meow meow'),
      ('b/bar/bar.config', 'blah blah blah'),
      ('c/bar/bar.config', 'blah meow blah')))
    dp1 = self.options_fingerprinter.fingerprint(dir_option, [d1])
    dp2 = self.options_fingerprinter.fingerprint(dir_option, [d1, d2])
    dp3 = self.options_fingerprinter.fingerprint(dir_option, [d2, d1])
    dp4 = self.options_fingerprinter.fingerprint(dir_option, [d3])

    self.assertEquals(dp1, dp1)
    self.assertEquals(dp2, dp2)
    self.assertNotEquals(dp1, dp3)
    self.assertNotEquals(dp1, dp4)
    self.assertNotEquals(dp2, dp3)
예제 #10
0
class TaskBase(SubsystemClientMixin, Optionable, AbstractClass):
    """Defines a lifecycle that prepares a task for execution and provides the base machinery
  needed to execute it.

  Provides the base lifecycle methods that allow a task to interact with the command line, other
  tasks and the user.  The lifecycle is linear and run via the following sequence:
  1. register_options - declare options configurable via cmd-line flag or config file.
  2. product_types - declare the product types your task is capable of producing.
  3. alternate_target_roots - propose a different set of target roots to use than those specified
                              via the CLI for the active pants run.
  4. prepare - request any products needed from other tasks.
  5. __init__ - distill configuration into the information needed to execute.

  Provides access to the current run context for scoping work.

  Also provides the basic facilities for doing work efficiently including providing a work directory
  for scratch space on disk, an invalidator for checking which targets need work done on, and an
  artifact cache for re-using previously cached work.

  #TODO(John Sirois):  Lifecycle is currently split between TaskBase and Task and lifecycle
  (interface) and helpers (utility) are currently conflated.  Tease these apart and narrow the scope
  of the helpers.  Ideally console tasks don't inherit a workdir, invalidator or build cache for
  example.
  """
    options_scope_category = ScopeInfo.TASK

    # We set this explicitly on the synthetic subclass, so that it shares a stable name with
    # its superclass, which is not necessary for regular use, but can be convenient in tests.
    _stable_name = None

    @classmethod
    def stable_name(cls):
        """The stable name of this task type.

    We synthesize subclasses of the task types at runtime, and these synthesized subclasses
    may have random names (e.g., in tests), so this gives us a stable name to use across runs,
    e.g., in artifact cache references.
    """
        return cls._stable_name or cls._compute_stable_name()

    @classmethod
    def _compute_stable_name(cls):
        return '{}_{}'.format(cls.__module__, cls.__name__).replace('.', '_')

    @classmethod
    def global_subsystems(cls):
        """The global subsystems this task uses.

    A tuple of subsystem types.
    """
        return tuple()

    @classmethod
    def task_subsystems(cls):
        """The private, per-task subsystems this task uses.

    A tuple of subsystem types.
    """
        return (CacheSetup, )

    @classmethod
    def product_types(cls):
        """The list of products this Task produces. Set the product type(s) for this
    task i.e. the product type(s) this task creates e.g ['classes'].

    By default, each task is considered as creating a unique product type(s).
    Subclasses that create products, should override this to specify their unique product type(s).
    """
        return []

    @classmethod
    def known_scope_infos(cls):
        """Yields ScopeInfo for all known scopes for this task, in no particular order."""
        # The task's own scope.
        yield cls.get_scope_info()
        # The scopes of any task-specific subsystems it uses.
        for dep in cls.subsystem_dependencies_iter():
            if not dep.is_global():
                yield dep.subsystem_cls.get_scope_info(subscope=dep.scope)

    @classmethod
    def supports_passthru_args(cls):
        """Subclasses may override to indicate that they can use passthru args."""
        return False

    @classmethod
    def _scoped_options(cls, options):
        return options[cls.options_scope]

    @classmethod
    def _alternate_target_roots(cls, options, address_mapper, build_graph):
        # Subclasses should not generally need to override this method.
        # TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
        return cls.alternate_target_roots(cls._scoped_options(options),
                                          address_mapper, build_graph)

    @classmethod
    def alternate_target_roots(cls, options, address_mapper, build_graph):
        """Allows a Task to propose alternate target roots from those specified on the CLI.

    At most 1 unique proposal is allowed amongst all tasks involved in the run.  If more than 1
    unique list of target roots is proposed an error is raised during task scheduling.

    :returns list: The new target roots to use or none to accept the CLI specified target roots.
    """

    @classmethod
    def _prepare(cls, options, round_manager):
        # Subclasses should not generally need to override this method.
        # TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
        return cls.prepare(cls._scoped_options(options), round_manager)

    @classmethod
    def prepare(cls, options, round_manager):
        """Prepares a task for execution.

    Called before execution and prior to any tasks that may be (indirectly) depended upon.

    Typically a task that requires products from other goals would register interest in those
    products here and then retrieve the requested product mappings when executed.
    """

    def __init__(self, context, workdir):
        """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.
    """
        super(TaskBase, self).__init__()
        self.context = context
        self._workdir = workdir
        # TODO: It would be nice to use self.get_options().cache_key_gen_version here, because then
        # we could have a separate value for each scope if we really wanted to. However we can't
        # access per-task options in Task.__init__ because GroupTask.__init__ calls it with the
        # group task's scope, which isn't currently in the known scopes we generate options for.
        self._cache_key_generator = CacheKeyGenerator(
            self.context.options.for_global_scope().cache_key_gen_version)

        self._cache_key_errors = set()

        self._build_invalidator_dir = os.path.join(
            self.context.options.for_global_scope().pants_workdir,
            'build_invalidator', self.stable_name())

        self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

        self._options_fingerprinter = OptionsFingerprinter(
            self.context.build_graph)
        self._fingerprint = None

    def get_options(self):
        """Returns the option values for this task's scope."""
        return self.context.options.for_scope(self.options_scope)

    def get_passthru_args(self):
        if not self.supports_passthru_args():
            raise TaskError('{0} Does not support passthru args.'.format(
                self.stable_name()))
        else:
            return self.context.options.passthru_args_for_scope(
                self.options_scope)

    @property
    def workdir(self):
        """A scratch-space for this task that will be deleted by `clean-all`.

    It's not guaranteed that the workdir exists, just that no other task has been given this
    workdir path to use.
    """
        return self._workdir

    def _options_fingerprint(self, scope):
        pairs = self.context.options.get_fingerprintable_for_scope(scope)
        hasher = sha1()
        for (option_type, option_val) in pairs:
            fp = self._options_fingerprinter.fingerprint(
                option_type, option_val)
            if fp is not None:
                hasher.update(fp)
        return hasher.hexdigest()

    @property
    def fingerprint(self):
        """Returns a fingerprint for the identity of the task.

    A task fingerprint is composed of the options the task is currently running under.
    Useful for invalidating unchanging targets being executed beneath changing task
    options that affect outputted artifacts.

    A task's fingerprint is only valid afer the task has been fully initialized.
    """
        if not self._fingerprint:
            hasher = sha1()
            hasher.update(self._options_fingerprint(self.options_scope))
            # TODO: this is not recursive, but should be: see #2739
            for dep in self.subsystem_dependencies_iter():
                hasher.update(self._options_fingerprint(dep.options_scope()))
            self._fingerprint = str(hasher.hexdigest())
        return self._fingerprint

    def artifact_cache_reads_enabled(self):
        return self._cache_factory.read_cache_available()

    def artifact_cache_writes_enabled(self):
        return self._cache_factory.write_cache_available()

    def invalidate(self):
        """Invalidates all targets for this task."""
        BuildInvalidator(self._build_invalidator_dir).force_invalidate_all()

    def create_cache_manager(self,
                             invalidate_dependents,
                             fingerprint_strategy=None):
        """Creates a cache manager that can be used to invalidate targets on behalf of this task.

    Use this if you need to check for invalid targets but can't use the contextmanager created by
    invalidated(), e.g., because you don't want to mark the targets as valid when done.

    invalidate_dependents:   If True then any targets depending on changed targets are invalidated.
    fingerprint_strategy:    A FingerprintStrategy instance, which can do per task, finer grained
                             fingerprinting of a given Target.
    """

        return InvalidationCacheManager(
            self._cache_key_generator,
            self._build_invalidator_dir,
            invalidate_dependents,
            fingerprint_strategy=fingerprint_strategy,
            invalidation_report=self.context.invalidation_report,
            task_name=type(self).__name__)

    @property
    def cache_target_dirs(self):
        """Whether to cache files in VersionedTarget's results_dir after exiting an invalidated block.

    Subclasses may override this method to return True if they wish to use this style
    of "automated" caching, where each VersionedTarget is given an associated results directory,
    which will automatically be uploaded to the cache. Tasks should place the output files
    for each VersionedTarget in said results directory. It is highly suggested to follow this
    schema for caching, rather than manually making updates to the artifact cache.
    """
        return False

    @property
    def incremental(self):
        """Whether this Task implements incremental building of individual targets.

    Incremental tasks with `cache_target_dirs` set will have the results_dir of the previous build
    for a target cloned into the results_dir for the current build (where possible). This
    copy-on-write behaviour allows for immutability of the results_dir once a target has been
    marked valid.
    """
        return False

    @property
    def cache_incremental(self):
        """For incremental tasks, indicates whether the results of incremental builds should be cached.

    Deterministic per-target incremental compilation is a relatively difficult thing to implement,
    so this property provides an escape hatch to avoid caching things in that riskier case.
    """
        return False

    @contextmanager
    def invalidated(self,
                    targets,
                    invalidate_dependents=False,
                    partition_size_hint=sys.maxint,
                    silent=False,
                    locally_changed_targets=None,
                    fingerprint_strategy=None,
                    topological_order=False):
        """Checks targets for invalidation, first checking the artifact cache.

    Subclasses call this to figure out what to work on.

    :param targets:               The targets to check for changes.
    :param invalidate_dependents: If True then any targets depending on changed targets are invalidated.
    :param partition_size_hint:   Each VersionedTargetSet in the yielded list will represent targets
                                  containing roughly this number of source files, if possible. Set to
                                  sys.maxint for a single VersionedTargetSet. Set to 0 for one
                                  VersionedTargetSet per target. It is up to the caller to do the right
                                  thing with whatever partitioning it asks for.
    :param locally_changed_targets: Targets that we've edited locally. If specified, and there aren't too
                                  many of them, we keep these in separate partitions from other targets,
                                  as these are more likely to have build errors, and so to be rebuilt over
                                  and over, and partitioning them separately is a performance win.
    :param fingerprint_strategy:   A FingerprintStrategy instance, which can do per task, finer grained
                                  fingerprinting of a given Target.

    If no exceptions are thrown by work in the block, the build cache is updated for the targets.
    Note: the artifact cache is not updated. That must be done manually.

    :returns: Yields an InvalidationCheck object reflecting the (partitioned) targets.
    :rtype: InvalidationCheck
    """

        # TODO(benjy): Compute locally_changed_targets here instead of passing it in? We currently pass
        # it in because JvmCompile already has the source->target mapping for other reasons, and also
        # to selectively enable this feature.
        fingerprint_strategy = fingerprint_strategy or TaskIdentityFingerprintStrategy(
            self)
        cache_manager = self.create_cache_manager(
            invalidate_dependents, fingerprint_strategy=fingerprint_strategy)
        # We separate locally-modified targets from others by coloring them differently.
        # This can be a performance win, because these targets are more likely to be iterated
        # over, and this preserves "chunk stability" for them.
        colors = {}

        # But we only do so if there aren't too many, or this optimization will backfire.
        locally_changed_target_limit = 10

        if locally_changed_targets and len(
                locally_changed_targets) < locally_changed_target_limit:
            for t in targets:
                if t in locally_changed_targets:
                    colors[t] = 'locally_changed'
                else:
                    colors[t] = 'not_locally_changed'
        invalidation_check = cache_manager.check(
            targets,
            partition_size_hint,
            colors,
            topological_order=topological_order)

        if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled(
        ):
            with self.context.new_workunit('cache'):
                cached_vts, uncached_vts, uncached_causes = \
                  self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check))
            if cached_vts:
                cached_targets = [vt.target for vt in cached_vts]
                self.context.run_tracker.artifact_cache_stats.add_hits(
                    cache_manager.task_name, cached_targets)
                if not silent:
                    self._report_targets('Using cached artifacts for ',
                                         cached_targets, '.')
            if uncached_vts:
                uncached_targets = [vt.target for vt in uncached_vts]
                self.context.run_tracker.artifact_cache_stats.add_misses(
                    cache_manager.task_name, uncached_targets, uncached_causes)
                if not silent:
                    self._report_targets('No cached artifacts for ',
                                         uncached_targets, '.')
            # Now that we've checked the cache, re-partition whatever is still invalid.
            invalidation_check = \
              InvalidationCheck(invalidation_check.all_vts, uncached_vts, partition_size_hint, colors)

        self._maybe_create_results_dirs(invalidation_check.all_vts)

        if not silent:
            targets = []
            num_invalid_partitions = len(
                invalidation_check.invalid_vts_partitioned)
            for vt in invalidation_check.invalid_vts_partitioned:
                targets.extend(vt.targets)

            if len(targets):
                msg_elements = [
                    'Invalidated ',
                    items_to_report_element(
                        [t.address.reference() for t in targets], 'target')
                ]
                if num_invalid_partitions > 1:
                    msg_elements.append(' in {} target partitions'.format(
                        num_invalid_partitions))
                msg_elements.append('.')
                self.context.log.info(*msg_elements)

        invalidation_report = self.context.invalidation_report
        if invalidation_report:
            for vts in invalidation_check.all_vts:
                invalidation_report.add_vts(cache_manager,
                                            vts.targets,
                                            vts.cache_key,
                                            vts.valid,
                                            phase='pre-check')

        # Yield the result, and then mark the targets as up to date.
        yield invalidation_check

        if invalidation_report:
            for vts in invalidation_check.all_vts:
                invalidation_report.add_vts(cache_manager,
                                            vts.targets,
                                            vts.cache_key,
                                            vts.valid,
                                            phase='post-check')
        for vt in invalidation_check.invalid_vts:
            vt.update()  # In case the caller doesn't update.

        write_to_cache = (self.cache_target_dirs
                          and self.artifact_cache_writes_enabled()
                          and invalidation_check.invalid_vts)
        if write_to_cache:
            pairs = []
            for vt in invalidation_check.invalid_vts:
                if self._should_cache(vt):
                    pairs.append((vt, [vt.results_dir]))
            self.update_artifact_cache(pairs)

    def _should_cache(self, vt):
        """Return true if the given vt should be written to a cache (if configured)."""
        if vt.target.has_label('no_cache'):
            return False
        elif not vt.is_incremental or self.cache_incremental:
            return True
        else:
            return False

    def _maybe_create_results_dirs(self, vts):
        """If `cache_target_dirs`, create results_dirs for the given versioned targets."""
        if self.cache_target_dirs:
            for vt in vts:
                vt.create_results_dir(self.workdir,
                                      allow_incremental=self.incremental)

    def check_artifact_cache_for(self, invalidation_check):
        """Decides which VTS to check the artifact cache for.

    By default we check for each invalid target. Can be overridden, e.g., to
    instead check only for a single artifact for the entire target set.
    """
        return invalidation_check.invalid_vts

    def check_artifact_cache(self, vts):
        """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a tuple (cached, uncached, uncached_causes) of VersionedTargets that were
    satisfied/unsatisfied from the cache. Uncached VTS are also attached with their
    causes for the miss: `False` indicates a legit miss while `UnreadableArtifact`
    is due to either local or remote cache failures.
    """
        return self.do_check_artifact_cache(vts)

    def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
        """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a pair (cached, uncached) of VersionedTargets that were
    satisfied/unsatisfied from the cache.
    """
        if not vts:
            return [], [], []

        read_cache = self._cache_factory.get_read_cache()
        items = [(read_cache, vt.cache_key,
                  vt.results_dir if vt.has_results_dir else None)
                 for vt in vts]

        res = self.context.subproc_map(call_use_cached_files, items)

        self._maybe_create_results_dirs(vts)

        cached_vts = []
        uncached_vts = []
        uncached_causes = []

        # Note that while the input vts may represent multiple targets (for tasks that overrride
        # check_artifact_cache_for), the ones we return must represent single targets.
        # Once flattened, cached/uncached vts are in separate lists. Each uncached vts is paired
        # with why it is missed for stat reporting purpose.
        for vt, was_in_cache in zip(vts, res):
            if was_in_cache:
                cached_vts.extend(vt.versioned_targets)
            else:
                uncached_vts.extend(vt.versioned_targets)
                uncached_causes.extend(
                    repeat(was_in_cache, len(vt.versioned_targets)))
                if isinstance(was_in_cache, UnreadableArtifact):
                    self._cache_key_errors.update(was_in_cache.key)

        if post_process_cached_vts:
            post_process_cached_vts(cached_vts)
        for vt in cached_vts:
            vt.update()
        return cached_vts, uncached_vts, uncached_causes

    def update_artifact_cache(self, vts_artifactfiles_pairs):
        """Write to the artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of absolute paths to artifacts for the VersionedTargetSet.
    """
        update_artifact_cache_work = self._get_update_artifact_cache_work(
            vts_artifactfiles_pairs)
        if update_artifact_cache_work:
            self.context.submit_background_work_chain(
                [update_artifact_cache_work], parent_workunit_name='cache')

    def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
        """Create a Work instance to update an artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of paths to artifacts for the VersionedTargetSet.
    """
        cache = self._cache_factory.get_write_cache()
        if cache:
            if len(vts_artifactfiles_pairs) == 0:
                return None
                # Do some reporting.
            targets = set()
            for vts, _ in vts_artifactfiles_pairs:
                targets.update(vts.targets)
            self._report_targets('Caching artifacts for ', list(targets), '.')

            always_overwrite = self._cache_factory.overwrite()

            # Cache the artifacts.
            args_tuples = []
            for vts, artifactfiles in vts_artifactfiles_pairs:
                overwrite = always_overwrite or vts.cache_key in self._cache_key_errors
                args_tuples.append(
                    (cache, vts.cache_key, artifactfiles, overwrite))

            return Work(lambda x: self.context.subproc_map(call_insert, x),
                        [(args_tuples, )], 'insert')
        else:
            return None

    def _report_targets(self, prefix, targets, suffix):
        self.context.log.info(
            prefix,
            items_to_report_element([t.address.reference() for t in targets],
                                    'target'), suffix)

    def require_single_root_target(self):
        """If a single target was specified on the cmd line, returns that target.

    Otherwise throws TaskError.
    """
        target_roots = self.context.target_roots
        if len(target_roots) == 0:
            raise TaskError('No target specified.')
        elif len(target_roots) > 1:
            raise TaskError('Multiple targets specified: {}'.format(', '.join(
                [repr(t) for t in target_roots])))
        return target_roots[0]
예제 #11
0
 def setUp(self):
   super(OptionsFingerprinterTest, self).setUp()
   self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)
예제 #12
0
 def options_fingerprint(self):
     return OptionsFingerprinter.combined_options_fingerprint_for_scope(
         GLOBAL_SCOPE,
         self._bootstrap_options,
         fingerprint_key='daemon',
         invert=True)
예제 #13
0
파일: task.py 프로젝트: lahosken/pants
class TaskBase(SubsystemClientMixin, Optionable, AbstractClass):
  """Defines a lifecycle that prepares a task for execution and provides the base machinery
  needed to execute it.

  Provides the base lifecycle methods that allow a task to interact with the command line, other
  tasks and the user.  The lifecycle is linear and run via the following sequence:
  1. register_options - declare options configurable via cmd-line flag or config file.
  2. product_types - declare the product types your task is capable of producing.
  3. alternate_target_roots - propose a different set of target roots to use than those specified
                              via the CLI for the active pants run.
  4. prepare - request any products needed from other tasks.
  5. __init__ - distill configuration into the information needed to execute.

  Provides access to the current run context for scoping work.

  Also provides the basic facilities for doing work efficiently including providing a work directory
  for scratch space on disk, an invalidator for checking which targets need work done on, and an
  artifact cache for re-using previously cached work.

  #TODO(John Sirois):  Lifecycle is currently split between TaskBase and Task and lifecycle
  (interface) and helpers (utility) are currently conflated.  Tease these apart and narrow the scope
  of the helpers.  Ideally console tasks don't inherit a workdir, invalidator or build cache for
  example.
  """
  options_scope_category = ScopeInfo.TASK

  # We set this explicitly on the synthetic subclass, so that it shares a stable name with
  # its superclass, which is not necessary for regular use, but can be convenient in tests.
  _stable_name = None

  @classmethod
  def implementation_version(cls):
    """
    :API: public
    """
    return [('TaskBase', 2)]

  @classmethod
  @memoized_method
  def implementation_version_str(cls):
    return '.'.join(['_'.join(map(str, x)) for x in cls.implementation_version()])

  @classmethod
  def stable_name(cls):
    """The stable name of this task type.

    We synthesize subclasses of the task types at runtime, and these synthesized subclasses
    may have random names (e.g., in tests), so this gives us a stable name to use across runs,
    e.g., in artifact cache references.
    """
    return cls._stable_name or cls._compute_stable_name()

  @classmethod
  def _compute_stable_name(cls):
    return '{}_{}'.format(cls.__module__, cls.__name__).replace('.', '_')

  @classmethod
  def subsystem_dependencies(cls):
    return super(TaskBase, cls).subsystem_dependencies() + (CacheSetup.scoped(cls),)

  @classmethod
  def product_types(cls):
    """The list of products this Task produces. Set the product type(s) for this
    task i.e. the product type(s) this task creates e.g ['classes'].

    By default, each task is considered as creating a unique product type(s).
    Subclasses that create products, should override this to specify their unique product type(s).

    :API: public
    """
    return []

  @classmethod
  def known_scope_infos(cls):
    """Yields ScopeInfo for all known scopes for this task, in no particular order."""
    # The task's own scope.
    yield cls.get_scope_info()
    # The scopes of any task-specific subsystems it uses.
    for dep in cls.subsystem_dependencies_iter():
      if not dep.is_global():
        yield dep.subsystem_cls.get_scope_info(subscope=dep.scope)

  @classmethod
  def supports_passthru_args(cls):
    """Subclasses may override to indicate that they can use passthru args.

    :API: public
    """
    return False

  @classmethod
  def _scoped_options(cls, options):
    return options[cls.options_scope]

  @classmethod
  def get_alternate_target_roots(cls, options, address_mapper, build_graph):
    # Subclasses should not generally need to override this method.
    return cls.alternate_target_roots(cls._scoped_options(options), address_mapper, build_graph)

  @classmethod
  def alternate_target_roots(cls, options, address_mapper, build_graph):
    """Allows a Task to propose alternate target roots from those specified on the CLI.

    At most 1 unique proposal is allowed amongst all tasks involved in the run.  If more than 1
    unique list of target roots is proposed an error is raised during task scheduling.

    :API: public

    :returns list: The new target roots to use or None to accept the CLI specified target roots.
    """

  @classmethod
  def invoke_prepare(cls, options, round_manager):
    # Subclasses should not generally need to override this method.
    return cls.prepare(cls._scoped_options(options), round_manager)

  @classmethod
  def prepare(cls, options, round_manager):
    """Prepares a task for execution.

    Called before execution and prior to any tasks that may be (indirectly) depended upon.

    Typically a task that requires products from other goals would register interest in those
    products here and then retrieve the requested product mappings when executed.

    :API: public
    """

  def __init__(self, context, workdir):
    """Subclass __init__ methods, if defined, *must* follow this idiom:

    class MyTask(Task):
      def __init__(self, *args, **kwargs):
        super(MyTask, self).__init__(*args, **kwargs)
        ...

    This allows us to change Task.__init__()'s arguments without
    changing every subclass. If the subclass does not need its own
    initialization, this method can (and should) be omitted entirely.

    :API: public
    """
    super(TaskBase, self).__init__()
    self.context = context
    self._workdir = workdir

    self._cache_key_errors = set()

    self._build_invalidator_dir = os.path.join(
      self.context.options.for_global_scope().pants_workdir,
      'build_invalidator',
      self.stable_name())

    self._cache_factory = CacheSetup.create_cache_factory_for_task(self)

    self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)

  def get_options(self):
    """Returns the option values for this task's scope.

    :API: public
    """
    return self.context.options.for_scope(self.options_scope)

  def get_passthru_args(self):
    """
    :API: public
    """
    if not self.supports_passthru_args():
      raise TaskError('{0} Does not support passthru args.'.format(self.stable_name()))
    else:
      return self.context.options.passthru_args_for_scope(self.options_scope)

  @property
  def workdir(self):
    """A scratch-space for this task that will be deleted by `clean-all`.

    It's not guaranteed that the workdir exists, just that no other task has been given this
    workdir path to use.

    :API: public
    """
    return self._workdir

  def _options_fingerprint(self, scope):
    pairs = self.context.options.get_fingerprintable_for_scope(scope)
    hasher = sha1()
    for (option_type, option_val) in pairs:
      fp = self._options_fingerprinter.fingerprint(option_type, option_val)
      if fp is not None:
        hasher.update(fp)
    return hasher.hexdigest()

  @memoized_property
  def fingerprint(self):
    """Returns a fingerprint for the identity of the task.

    A task fingerprint is composed of the options the task is currently running under.
    Useful for invalidating unchanging targets being executed beneath changing task
    options that affect outputted artifacts.

    A task's fingerprint is only valid afer the task has been fully initialized.
    """
    hasher = sha1()
    hasher.update(self._options_fingerprint(self.options_scope))
    hasher.update(self.implementation_version_str())
    # TODO: this is not recursive, but should be: see #2739
    for dep in self.subsystem_dependencies_iter():
      hasher.update(self._options_fingerprint(dep.options_scope()))
    return str(hasher.hexdigest())

  def artifact_cache_reads_enabled(self):
    return self._cache_factory.read_cache_available()

  def artifact_cache_writes_enabled(self):
    return self._cache_factory.write_cache_available()

  def invalidate(self):
    """Invalidates all targets for this task."""
    BuildInvalidator(self._build_invalidator_dir).force_invalidate_all()

  @property
  def create_target_dirs(self):
    """Whether to create a results_dir per VersionedTarget in the workdir of the Task.

    This defaults to the value of `self.cache_target_dirs` (as caching them requires
    creating them), but may be overridden independently to create the dirs without caching
    them.

    :API: public
    """
    return self.cache_target_dirs or False

  @property
  def cache_target_dirs(self):
    """Whether to cache files in VersionedTarget's results_dir after exiting an invalidated block.

    Subclasses may override this method to return True if they wish to use this style
    of "automated" caching, where each VersionedTarget is given an associated results directory,
    which will automatically be uploaded to the cache. Tasks should place the output files
    for each VersionedTarget in said results directory. It is highly suggested to follow this
    schema for caching, rather than manually making updates to the artifact cache.

    :API: public
    """
    return False

  @property
  def incremental(self):
    """Whether this Task implements incremental building of individual targets.

    Incremental tasks with `cache_target_dirs` set will have the results_dir of the previous build
    for a target cloned into the results_dir for the current build (where possible). This
    copy-on-write behaviour allows for immutability of the results_dir once a target has been
    marked valid.

    :API: public
    """
    return False

  @property
  def cache_incremental(self):
    """For incremental tasks, indicates whether the results of incremental builds should be cached.

    Deterministic per-target incremental compilation is a relatively difficult thing to implement,
    so this property provides an escape hatch to avoid caching things in that riskier case.

    :API: public
    """
    return False

  @contextmanager
  def invalidated(self,
                  targets,
                  invalidate_dependents=False,
                  silent=False,
                  fingerprint_strategy=None,
                  topological_order=False):
    """Checks targets for invalidation, first checking the artifact cache.

    Subclasses call this to figure out what to work on.

    :API: public

    :param targets: The targets to check for changes.
    :param invalidate_dependents: If True then any targets depending on changed targets are
                                  invalidated.
    :param silent: If true, suppress logging information about target invalidation.
    :param fingerprint_strategy: A FingerprintStrategy instance, which can do per task,
                                finer grained fingerprinting of a given Target.
    :param topological_order: Whether to invalidate in dependency order.

    If no exceptions are thrown by work in the block, the build cache is updated for the targets.
    Note: the artifact cache is not updated. That must be done manually.

    :returns: Yields an InvalidationCheck object reflecting the targets.
    :rtype: InvalidationCheck
    """

    cache_key_generator = CacheKeyGenerator(
      self.context.options.for_global_scope().cache_key_gen_version,
      self.fingerprint)
    cache_manager = InvalidationCacheManager(self.workdir,
                                             cache_key_generator,
                                             self._build_invalidator_dir,
                                             invalidate_dependents,
                                             fingerprint_strategy=fingerprint_strategy,
                                             invalidation_report=self.context.invalidation_report,
                                             task_name=type(self).__name__,
                                             task_version=self.implementation_version_str(),
                                             artifact_write_callback=self.maybe_write_artifact)

    invalidation_check = cache_manager.check(targets, topological_order=topological_order)

    self._maybe_create_results_dirs(invalidation_check.all_vts)

    if invalidation_check.invalid_vts and self.artifact_cache_reads_enabled():
      with self.context.new_workunit('cache'):
        cached_vts, uncached_vts, uncached_causes = \
          self.check_artifact_cache(self.check_artifact_cache_for(invalidation_check))
      if cached_vts:
        cached_targets = [vt.target for vt in cached_vts]
        self.context.run_tracker.artifact_cache_stats.add_hits(cache_manager.task_name,
                                                               cached_targets)
        if not silent:
          self._report_targets('Using cached artifacts for ', cached_targets, '.')
      if uncached_vts:
        uncached_targets = [vt.target for vt in uncached_vts]
        self.context.run_tracker.artifact_cache_stats.add_misses(cache_manager.task_name,
                                                                 uncached_targets,
                                                                 uncached_causes)
        if not silent:
          self._report_targets('No cached artifacts for ', uncached_targets, '.')
      # Now that we've checked the cache, re-partition whatever is still invalid.
      invalidation_check = \
        InvalidationCheck(invalidation_check.all_vts, uncached_vts)

    if not silent:
      targets = []
      for vt in invalidation_check.invalid_vts:
        targets.extend(vt.targets)

      if len(targets):
        msg_elements = ['Invalidated ',
                        items_to_report_element([t.address.reference() for t in targets], 'target'),
                        '.']
        self.context.log.info(*msg_elements)

    invalidation_report = self.context.invalidation_report
    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid,
                                    phase='pre-check')

    # Cache has been checked to create the full list of invalid VTs. Only copy previous_results for this subset of VTs.
    for vts in invalidation_check.invalid_vts:
      if self.incremental:
        vts.copy_previous_results(self.workdir)

    # Yield the result, and then mark the targets as up to date.
    yield invalidation_check

    if invalidation_report:
      for vts in invalidation_check.all_vts:
        invalidation_report.add_vts(cache_manager, vts.targets, vts.cache_key, vts.valid, phase='post-check')

    for vt in invalidation_check.invalid_vts:
      vt.update()

    # Background work to clean up previous builds.
    if self.context.options.for_global_scope().workdir_max_build_entries is not None:
      self._launch_background_workdir_cleanup(invalidation_check.all_vts)

  def maybe_write_artifact(self, vt):
    if self._should_cache_target_dir(vt):
      self.update_artifact_cache([(vt, [vt.current_results_dir])])

  def _launch_background_workdir_cleanup(self, vts):
    workdir_build_cleanup_job = Work(self._cleanup_workdir_stale_builds, [(vts,)], 'workdir_build_cleanup')
    self.context.submit_background_work_chain([workdir_build_cleanup_job])

  def _cleanup_workdir_stale_builds(self, vts):
    # workdir_max_build_entries has been assured of not None before invoking this method.
    max_entries_per_target = max(2, self.context.options.for_global_scope().workdir_max_build_entries)
    for vt in vts:
      live_dirs = list(vt.live_dirs())
      if not live_dirs:
        continue
      root_dir = os.path.dirname(vt.results_dir)
      safe_rm_oldest_items_in_dir(root_dir, max_entries_per_target, excludes=live_dirs)

  def _should_cache_target_dir(self, vt):
    """Return true if the given vt should be written to a cache (if configured)."""
    return (
      self.cache_target_dirs and
      not vt.target.has_label('no_cache') and
      (not vt.is_incremental or self.cache_incremental) and
      self.artifact_cache_writes_enabled()
    )

  def _maybe_create_results_dirs(self, vts):
    """If `cache_target_dirs`, create results_dirs for the given versioned targets."""
    if self.create_target_dirs:
      for vt in vts:
        vt.create_results_dir()

  def check_artifact_cache_for(self, invalidation_check):
    """Decides which VTS to check the artifact cache for.

    By default we check for each invalid target. Can be overridden, e.g., to
    instead check only for a single artifact for the entire target set.
    """
    return invalidation_check.invalid_vts

  def check_artifact_cache(self, vts):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a tuple (cached, uncached, uncached_causes) of VersionedTargets that were
    satisfied/unsatisfied from the cache. Uncached VTS are also attached with their
    causes for the miss: `False` indicates a legit miss while `UnreadableArtifact`
    is due to either local or remote cache failures.
    """
    return self.do_check_artifact_cache(vts)

  def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
    """Checks the artifact cache for the specified list of VersionedTargetSets.

    Returns a pair (cached, uncached) of VersionedTargets that were
    satisfied/unsatisfied from the cache.
    """
    if not vts:
      return [], [], []

    read_cache = self._cache_factory.get_read_cache()
    items = [(read_cache, vt.cache_key, vt.current_results_dir if self.cache_target_dirs else None)
             for vt in vts]
    res = self.context.subproc_map(call_use_cached_files, items)

    cached_vts = []
    uncached_vts = []
    uncached_causes = []

    # Note that while the input vts may represent multiple targets (for tasks that overrride
    # check_artifact_cache_for), the ones we return must represent single targets.
    # Once flattened, cached/uncached vts are in separate lists. Each uncached vts is paired
    # with why it is missed for stat reporting purpose.
    for vt, was_in_cache in zip(vts, res):
      if was_in_cache:
        cached_vts.extend(vt.versioned_targets)
      else:
        uncached_vts.extend(vt.versioned_targets)
        uncached_causes.extend(repeat(was_in_cache, len(vt.versioned_targets)))
        if isinstance(was_in_cache, UnreadableArtifact):
          self._cache_key_errors.update(was_in_cache.key)

    if post_process_cached_vts:
      post_process_cached_vts(cached_vts)
    for vt in cached_vts:
      vt.update()
    return cached_vts, uncached_vts, uncached_causes

  def update_artifact_cache(self, vts_artifactfiles_pairs):
    """Write to the artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of absolute paths to artifacts for the VersionedTargetSet.
    """
    update_artifact_cache_work = self._get_update_artifact_cache_work(vts_artifactfiles_pairs)
    if update_artifact_cache_work:
      self.context.submit_background_work_chain([update_artifact_cache_work],
                                                parent_workunit_name='cache')

  def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
    """Create a Work instance to update an artifact cache, if we're configured to.

    vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
      - vts is single VersionedTargetSet.
      - artifactfiles is a list of paths to artifacts for the VersionedTargetSet.
    """
    cache = self._cache_factory.get_write_cache()
    if cache:
      if len(vts_artifactfiles_pairs) == 0:
        return None
        # Do some reporting.
      targets = set()
      for vts, _ in vts_artifactfiles_pairs:
        targets.update(vts.targets)

      self._report_targets(
        'Caching artifacts for ',
        list(targets),
        '.',
        logger=self.context.log.debug,
      )

      always_overwrite = self._cache_factory.overwrite()

      # Cache the artifacts.
      args_tuples = []
      for vts, artifactfiles in vts_artifactfiles_pairs:
        overwrite = always_overwrite or vts.cache_key in self._cache_key_errors
        args_tuples.append((cache, vts.cache_key, artifactfiles, overwrite))

      return Work(lambda x: self.context.subproc_map(call_insert, x), [(args_tuples,)], 'insert')
    else:
      return None

  def _report_targets(self, prefix, targets, suffix, logger=None):
    logger = logger or self.context.log.info
    logger(
      prefix,
      items_to_report_element([t.address.reference() for t in targets], 'target'),
      suffix,
    )

  def require_single_root_target(self):
    """If a single target was specified on the cmd line, returns that target.

    Otherwise throws TaskError.

    :API: public
    """
    target_roots = self.context.target_roots
    if len(target_roots) == 0:
      raise TaskError('No target specified.')
    elif len(target_roots) > 1:
      raise TaskError('Multiple targets specified: {}'
                      .format(', '.join([repr(t) for t in target_roots])))
    return target_roots[0]

  def determine_target_roots(self, goal_name, predicate=None):
    """Helper for tasks that scan for default target roots.

    :param string goal_name: The goal name to use for any warning emissions.
    :param callable predicate: The predicate to pass to `context.scan().targets(predicate=X)`.
    """
    if not self.context.target_roots and not self.get_options().enable_v2_engine:
      self.context.log.warn(
        'The behavior of `./pants {0}` (no explicit targets) will soon become a no-op. '
        'To remove this warning, please specify one or more explicit target specs (e.g. '
        '`./pants {0} ::`).'.format(goal_name))
      # For the v1 path, continue the behavior of e.g. `./pants list` implies `./pants list ::`.
      return self.context.scan().targets(predicate=predicate)

    # For the v2 path, e.g. `./pants list` is a functional no-op. This matches the v2 mode behavior
    # of e.g. `./pants --changed-parent=HEAD list` (w/ no changes) returning an empty result.
    return self.context.target_roots
예제 #14
0
class OptionsFingerprinterTest(BaseTest):

  def setUp(self):
    super(OptionsFingerprinterTest, self).setUp()
    self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)

  def test_fingerprint_dict(self):
    d1 = {'b': 1, 'a': 2}
    d2 = {'a': 2, 'b': 1}
    d3 = {'a': 1, 'b': 2}
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(dict_option, d)
                     for d in (d1, d2, d3))
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_list(self):
    l1 = [1, 2, 3]
    l2 = [1, 3, 2]
    fp1, fp2 = (self.options_fingerprinter.fingerprint(list_option, l)
                     for l in (l1, l2))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec(self):
    specs = [':t1', ':t2']
    payloads = [Payload() for i in range(2)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2 = specs

    fp_spec = lambda spec: self.options_fingerprinter.fingerprint(target_option, spec)
    fp1 = fp_spec(s1)
    fp2 = fp_spec(s2)
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec_list(self):
    specs = [':t1', ':t2', ':t3']
    payloads = [Payload() for i in range(3)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2, s3 = specs

    fp_specs = lambda specs: self.options_fingerprinter.fingerprint(target_option, specs)
    fp1 = fp_specs([s1, s2])
    fp2 = fp_specs([s2, s1])
    fp3 = fp_specs([s1, s3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_file(self):
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(file_option,
                                                            self.create_file(f, contents=c))
                     for (f, c) in (('foo/bar.config', 'blah blah blah'),
                                    ('foo/bar.config', 'meow meow meow'),
                                    ('spam/egg.config', 'blah blah blah')))
    self.assertNotEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)
    self.assertNotEquals(fp2, fp3)

  def test_fingerprint_file_outside_buildroot(self):
    with temporary_dir() as tmp:
      outside_buildroot = self.create_file(os.path.join(tmp, 'foobar'), contents='foobar')
      with self.assertRaises(ValueError):
        self.options_fingerprinter.fingerprint(file_option, outside_buildroot)

  def test_fingerprint_file_list(self):
    f1, f2, f3 = (self.create_file(f, contents=c) for (f, c) in
                  (('foo/bar.config', 'blah blah blah'),
                   ('foo/bar.config', 'meow meow meow'),
                   ('spam/egg.config', 'blah blah blah')))
    fp1 = self.options_fingerprinter.fingerprint(file_option, [f1, f2])
    fp2 = self.options_fingerprinter.fingerprint(file_option, [f2, f1])
    fp3 = self.options_fingerprinter.fingerprint(file_option, [f1, f3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_primitive(self):
    fp1, fp2 = (self.options_fingerprinter.fingerprint('', v) for v in ('foo', 5))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_unset_bool(self):
    fp1 = self.options_fingerprinter.fingerprint(UnsetBool, UnsetBool)
    fp2 = self.options_fingerprinter.fingerprint(UnsetBool, UnsetBool)
    self.assertEqual(fp1, fp2)

  def test_fingerprint_dir(self):
    d1 = self.create_dir('a')
    d2 = self.create_dir('b')
    d3 = self.create_dir('c')

    f1, f2, f3, f4, f5 = (self.create_file(f, contents=c) for (f, c) in (
      ('a/bar/bar.config', 'blah blah blah'),
      ('a/foo/foo.config', 'meow meow meow'),
      ('b/foo/foo.config', 'meow meow meow'),
      ('b/bar/bar.config', 'blah blah blah'),
      ('c/bar/bar.config', 'blah meow blah')))
    dp1 = self.options_fingerprinter.fingerprint(dir_option, [d1])
    dp2 = self.options_fingerprinter.fingerprint(dir_option, [d1, d2])
    dp3 = self.options_fingerprinter.fingerprint(dir_option, [d2, d1])
    dp4 = self.options_fingerprinter.fingerprint(dir_option, [d3])

    self.assertEquals(dp1, dp1)
    self.assertEquals(dp2, dp2)
    self.assertNotEquals(dp1, dp3)
    self.assertNotEquals(dp1, dp4)
    self.assertNotEquals(dp2, dp3)
class OptionsFingerprinterTest(BaseTest):

  def setUp(self):
    super(OptionsFingerprinterTest, self).setUp()
    self.options_fingerprinter = OptionsFingerprinter(self.context().build_graph)

  def test_fingerprint_dict(self):
    d1 = {'b': 1, 'a': 2}
    d2 = {'a': 2, 'b': 1}
    d3 = {'a': 1, 'b': 2}
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(dict_option, d)
                     for d in (d1, d2, d3))
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_list(self):
    l1 = [1, 2, 3]
    l2 = [1, 3, 2]
    fp1, fp2 = (self.options_fingerprinter.fingerprint(list_option, l)
                     for l in (l1, l2))
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec(self):
    specs = [':t1', ':t2']
    payloads = [Payload() for i in range(2)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2 = specs

    fp_spec = lambda spec: self.options_fingerprinter.fingerprint(target_option, spec)
    fp1 = fp_spec(s1)
    fp2 = fp_spec(s2)
    self.assertNotEquals(fp1, fp2)

  def test_fingerprint_target_spec_list(self):
    specs = [':t1', ':t2', ':t3']
    payloads = [Payload() for i in range(3)]
    for i, (s, p) in enumerate(zip(specs, payloads)):
      p.add_field('foo', PrimitiveField(i))
      self.make_target(s, payload=p)
    s1, s2, s3 = specs

    fp_specs = lambda specs: self.options_fingerprinter.fingerprint(target_option, specs)
    fp1 = fp_specs([s1, s2])
    fp2 = fp_specs([s2, s1])
    fp3 = fp_specs([s1, s3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_file(self):
    fp1, fp2, fp3 = (self.options_fingerprinter.fingerprint(file_option,
                                                            self.create_file(f, contents=c))
                     for (f, c) in (('foo/bar.config', 'blah blah blah'),
                                    ('foo/bar.config', 'meow meow meow'),
                                    ('spam/egg.config', 'blah blah blah')))
    self.assertNotEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)
    self.assertNotEquals(fp2, fp3)

  def test_fingerprint_file_list(self):
    f1, f2, f3 = (self.create_file(f, contents=c) for (f, c) in
                  (('foo/bar.config', 'blah blah blah'),
                   ('foo/bar.config', 'meow meow meow'),
                   ('spam/egg.config', 'blah blah blah')))
    fp1 = self.options_fingerprinter.fingerprint(file_option, [f1, f2])
    fp2 = self.options_fingerprinter.fingerprint(file_option, [f2, f1])
    fp3 = self.options_fingerprinter.fingerprint(file_option, [f1, f3])
    self.assertEquals(fp1, fp2)
    self.assertNotEquals(fp1, fp3)

  def test_fingerprint_primitive(self):
    fp1, fp2 = (self.options_fingerprinter.fingerprint('', v) for v in ('foo', 5))
    self.assertNotEquals(fp1, fp2)