Example #1
0
class InvalidationCacheManager(object):
  """Manages cache checks, updates and invalidation keeping track of basic change
  and invalidation statistics.
  Note that this is distinct from the ArtifactCache concept, and should probably be renamed.
  """

  class CacheValidationError(Exception):
    """Indicates a problem accessing the cache."""

  def __init__(self,
               cache_key_generator,
               build_invalidator_dir,
               invalidate_dependents,
               fingerprint_strategy=None):
    self._cache_key_generator = cache_key_generator
    self._invalidate_dependents = invalidate_dependents
    self._invalidator = BuildInvalidator(build_invalidator_dir)
    self._fingerprint_strategy = fingerprint_strategy

  def update(self, vts):
    """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
    for vt in vts.versioned_targets:
      self._invalidator.update(vt.cache_key)
      vt.valid = True
    self._invalidator.update(vts.cache_key)
    vts.valid = True

  def force_invalidate(self, vts):
    """Force invalidation of a VersionedTargetSet."""
    for vt in vts.versioned_targets:
      self._invalidator.force_invalidate(vt.cache_key)
      vt.valid = False
    self._invalidator.force_invalidate(vts.cache_key)
    vts.valid = False

  def check(self,
            targets,
            partition_size_hint=None,
            target_colors=None):
    """Checks whether each of the targets has changed and invalidates it if so.

    Returns a list of VersionedTargetSet objects (either valid or invalid). The returned sets
    'cover' the input targets, possibly partitioning them, and are in topological order.
    The caller can inspect these in order and, e.g., rebuild the invalid ones.

    If target_colors is specified, it must be a map from Target -> opaque 'color' values.
    Two Targets will be in the same partition only if they have the same color.
    """
    all_vts = self._sort_and_validate_targets(targets)
    invalid_vts = filter(lambda vt: not vt.valid, all_vts)
    return InvalidationCheck(all_vts, invalid_vts, partition_size_hint, target_colors)

  def _sort_and_validate_targets(self, targets):
    """Validate each target.

    Returns a topologically ordered set of VersionedTargets, each representing one input target.
    """
    # We must check the targets in this order, to ensure correctness if invalidate_dependents=True,
    # since we use earlier cache keys to compute later cache keys in this case.
    ordered_targets = self._order_target_list(targets)

    # This will be a list of VersionedTargets that correspond to @targets.
    versioned_targets = []

    # This will be a mapping from each target to its corresponding VersionedTarget.
    versioned_targets_by_target = {}

    # Map from id to current fingerprint of the target with that id. We update this as we iterate,
    # in topological order, so when handling a target, this will already contain all its deps (in
    # this round).
    id_to_hash = {}

    for target in ordered_targets:
      cache_key = self._key_for(target, transitive=self._invalidate_dependents)
      id_to_hash[target.id] = cache_key.hash

      # Create a VersionedTarget corresponding to @target.
      versioned_target = VersionedTarget(self, target, cache_key)

      # Add the new VersionedTarget to the list of computed VersionedTargets.
      versioned_targets.append(versioned_target)

    return versioned_targets

  def needs_update(self, cache_key):
    return self._invalidator.needs_update(cache_key)

  def _order_target_list(self, targets):
    """Orders the targets topologically, from least to most dependent."""
    return filter(targets.__contains__, reversed(sort_targets(targets)))

  def _key_for(self, target, transitive=False):
    try:
      return self._cache_key_generator.key_for_target(target,
                                                      transitive=transitive,
                                                      fingerprint_strategy=self._fingerprint_strategy)
    except Exception as e:
      # This is a catch-all for problems we haven't caught up with and given a better diagnostic.
      # TODO(Eric Ayers): If you see this exception, add a fix to catch the problem earlier.
      exc_info = sys.exc_info()
      new_exception = self.CacheValidationError("Problem validating target %s in %s: %s" %
                                                (target.id, target.address.spec_path, e))

      raise self.CacheValidationError, new_exception, exc_info[2]
Example #2
0
class InvalidationCacheManager(object):
    """Manages cache checks, updates and invalidation keeping track of basic change
  and invalidation statistics.
  Note that this is distinct from the ArtifactCache concept, and should probably be renamed.
  """
    class CacheValidationError(Exception):
        """Indicates a problem accessing the cache."""

    def __init__(self,
                 cache_key_generator,
                 build_invalidator_dir,
                 invalidate_dependents,
                 fingerprint_strategy=None,
                 invalidation_report=None,
                 task_name=None):
        self._cache_key_generator = cache_key_generator
        self._task_name = task_name or 'UNKNOWN'
        self._invalidate_dependents = invalidate_dependents
        self._invalidator = BuildInvalidator(build_invalidator_dir)
        self._fingerprint_strategy = fingerprint_strategy
        self.invalidation_report = invalidation_report

    def update(self, vts):
        """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
        for vt in vts.versioned_targets:
            self._invalidator.update(vt.cache_key)
            vt.valid = True
        self._invalidator.update(vts.cache_key)
        vts.valid = True

    def force_invalidate(self, vts):
        """Force invalidation of a VersionedTargetSet."""
        for vt in vts.versioned_targets:
            self._invalidator.force_invalidate(vt.cache_key)
            vt.valid = False
        self._invalidator.force_invalidate(vts.cache_key)
        vts.valid = False

    def check(self,
              targets,
              partition_size_hint=None,
              target_colors=None,
              topological_order=False):
        """Checks whether each of the targets has changed and invalidates it if so.

    Returns a list of VersionedTargetSet objects (either valid or invalid). The returned sets
    'cover' the input targets, possibly partitioning them, with one caveat: if the FingerprintStrategy
    opted out of fingerprinting a target because it doesn't contribute to invalidation, then that
    target will be excluded from all_vts, invalid_vts, and the partitioned VTS.

    Callers can inspect these vts and rebuild the invalid ones, for example.

    If target_colors is specified, it must be a map from Target -> opaque 'color' values.
    Two Targets will be in the same partition only if they have the same color.
    """
        all_vts = self.wrap_targets(targets,
                                    topological_order=topological_order)
        invalid_vts = filter(lambda vt: not vt.valid, all_vts)
        return InvalidationCheck(all_vts, invalid_vts, partition_size_hint,
                                 target_colors)

    @property
    def task_name(self):
        return self._task_name

    def wrap_targets(self, targets, topological_order=False):
        """Wrap targets and their computed cache keys in VersionedTargets.

    If the FingerprintStrategy opted out of providing a fingerprint for a target, that target will not
    have an associated VersionedTarget returned.

    Returns a list of VersionedTargets, each representing one input target.
    """
        def vt_iter():
            if topological_order:
                sorted_targets = [
                    t for t in reversed(sort_targets(targets)) if t in targets
                ]
            else:
                sorted_targets = sorted(targets)
            for target in sorted_targets:
                target_key = self._key_for(target)
                if target_key is not None:
                    yield VersionedTarget(self, target, target_key)

        return list(vt_iter())

    def needs_update(self, cache_key):
        return self._invalidator.needs_update(cache_key)

    def _key_for(self, target):
        try:
            return self._cache_key_generator.key_for_target(
                target,
                transitive=self._invalidate_dependents,
                fingerprint_strategy=self._fingerprint_strategy)
        except Exception as e:
            # This is a catch-all for problems we haven't caught up with and given a better diagnostic.
            # TODO(Eric Ayers): If you see this exception, add a fix to catch the problem earlier.
            exc_info = sys.exc_info()
            new_exception = self.CacheValidationError(
                "Problem validating target {} in {}: {}".format(
                    target.id, target.address.spec_path, e))

            raise self.CacheValidationError, new_exception, exc_info[2]
Example #3
0
class PythonChroot(object):
  _VALID_DEPENDENCIES = {
    PrepCommand: 'prep',
    PythonLibrary: 'libraries',
    PythonRequirementLibrary: 'reqs',
    PythonBinary: 'binaries',
    PythonThriftLibrary: 'thrifts',
    PythonAntlrLibrary: 'antlrs',
    PythonTests: 'tests'
  }

  MEMOIZED_THRIFTS = {}

  class InvalidDependencyException(Exception):
    def __init__(self, target):
      Exception.__init__(self, "Not a valid Python dependency! Found: %s" % target)

  def __init__(self,
               context,
               targets,
               extra_requirements=None,
               builder=None,
               platforms=None,
               interpreter=None):
    self.context = context
    self._targets = targets
    self._extra_requirements = list(extra_requirements) if extra_requirements else []
    self._platforms = platforms
    self._interpreter = interpreter or PythonInterpreter.get()
    self._builder = builder or PEXBuilder(os.path.realpath(tempfile.mkdtemp()),
                                          interpreter=self._interpreter)

    # Note: unrelated to the general pants artifact cache.
    self._egg_cache_root = os.path.join(
        PythonSetup(self.context.config).scratch_dir('artifact_cache', default_name='artifacts'),
        str(self._interpreter.identity))

    self._key_generator = CacheKeyGenerator()
    self._build_invalidator = BuildInvalidator( self._egg_cache_root)

  def delete(self):
    """Deletes this chroot from disk if it has been dumped."""
    safe_rmtree(self.path())

  def __del__(self):
    if os.getenv('PANTS_LEAVE_CHROOT') is None:
      self.delete()
    else:
      self.debug('Left chroot at %s' % self.path())

  @property
  def builder(self):
    return self._builder

  def debug(self, msg, indent=0):
    if os.getenv('PANTS_VERBOSE') is not None:
      print('%s%s' % (' ' * indent, msg))

  def path(self):
    return os.path.realpath(self._builder.path())

  def _dump_library(self, library):
    def copy_to_chroot(base, path, add_function):
      src = os.path.join(get_buildroot(), base, path)
      add_function(src, path)

    self.debug('  Dumping library: %s' % library)
    for relpath in library.sources_relative_to_source_root():
      try:
        copy_to_chroot(library.target_base, relpath, self._builder.add_source)
      except OSError as e:
        logger.error("Failed to copy {path} for library {library}"
                     .format(path=os.path.join(library.target_base, relpath),
                             library=library))
        raise

    for resources_tgt in library.resources:
      for resource_file_from_source_root in resources_tgt.sources_relative_to_source_root():
        try:
          copy_to_chroot(resources_tgt.target_base, resource_file_from_source_root,
                         self._builder.add_resource)
        except OSError as e:
          logger.error("Failed to copy {path} for resource {resource}"
                       .format(path=os.path.join(resources_tgt.target_base,
                                                 resource_file_from_source_root),
                               resource=resources_tgt.address.spec))
          raise

  def _dump_requirement(self, req):
    self.debug('  Dumping requirement: %s' % req)
    self._builder.add_requirement(req)

  def _dump_distribution(self, dist):
    self.debug('  Dumping distribution: .../%s' % os.path.basename(dist.location))
    self._builder.add_distribution(dist)

  def _generate_requirement(self, library, builder_cls):
    library_key = self._key_generator.key_for_target(library)
    builder = builder_cls(library, get_buildroot(),
                          self.context.options, '-' + library_key.hash[:8])

    cache_dir = os.path.join(self._egg_cache_root, library_key.id)
    if self._build_invalidator.needs_update(library_key):
      sdist = builder.build(interpreter=self._interpreter)
      safe_mkdir(cache_dir)
      shutil.copy(sdist, os.path.join(cache_dir, os.path.basename(sdist)))
      self._build_invalidator.update(library_key)

    return PythonRequirement(builder.requirement_string(), repository=cache_dir, use_2to3=True)

  def _generate_thrift_requirement(self, library):
    return self._generate_requirement(library, PythonThriftBuilder)

  def _generate_antlr_requirement(self, library):
    return self._generate_requirement(library, PythonAntlrBuilder)

  def resolve(self, targets):
    children = defaultdict(OrderedSet)
    def add_dep(trg):
      for target_type, target_key in self._VALID_DEPENDENCIES.items():
        if isinstance(trg, target_type):
          children[target_key].add(trg)
          return
        elif isinstance(trg, Dependencies):
          return
      raise self.InvalidDependencyException(trg)
    for target in targets:
      target.walk(add_dep)
    return children

  def dump(self):
    self.debug('Building chroot for %s:' % self._targets)
    targets = self.resolve(self._targets)

    for lib in targets['libraries'] | targets['binaries']:
      self._dump_library(lib)

    generated_reqs = OrderedSet()
    if targets['thrifts']:
      for thr in set(targets['thrifts']):
        if thr not in self.MEMOIZED_THRIFTS:
          self.MEMOIZED_THRIFTS[thr] = self._generate_thrift_requirement(thr)
        generated_reqs.add(self.MEMOIZED_THRIFTS[thr])

      generated_reqs.add(PythonRequirement('thrift', use_2to3=True))

    for antlr in targets['antlrs']:
      generated_reqs.add(self._generate_antlr_requirement(antlr))

    reqs_from_libraries = OrderedSet()
    for req_lib in targets['reqs']:
      for req in req_lib.payload.requirements:
        reqs_from_libraries.add(req)

    reqs_to_build = OrderedSet()
    find_links = []

    for req in reqs_from_libraries | generated_reqs | self._extra_requirements:
      if not req.should_build(self._interpreter.python, Platform.current()):
        self.debug('Skipping %s based upon version filter' % req)
        continue
      reqs_to_build.add(req)
      self._dump_requirement(req.requirement)
      if req.repository:
        find_links.append(req.repository)

    distributions = resolve_multi(
         self.context.config,
         reqs_to_build,
         interpreter=self._interpreter,
         platforms=self._platforms,
         ttl=self.context.options.for_global_scope().python_chroot_requirements_ttl,
         find_links=find_links)

    locations = set()
    for platform, dist_set in distributions.items():
      for dist in dist_set:
        if dist.location not in locations:
          self._dump_distribution(dist)
        locations.add(dist.location)

    if len(targets['binaries']) > 1:
      print('WARNING: Target has multiple python_binary targets!', file=sys.stderr)

    return self._builder
Example #4
0
class PythonChroot(object):
    _VALID_DEPENDENCIES = {
        PythonLibrary: 'libraries',
        PythonRequirementLibrary: 'reqs',
        PythonBinary: 'binaries',
        PythonThriftLibrary: 'thrifts',
        PythonAntlrLibrary: 'antlrs',
        PythonTests: 'tests'
    }

    MEMOIZED_THRIFTS = {}

    class InvalidDependencyException(Exception):
        def __init__(self, target):
            Exception.__init__(
                self, "Not a valid Python dependency! Found: %s" % target)

    def __init__(self,
                 target,
                 root_dir,
                 extra_targets=None,
                 extra_requirements=None,
                 builder=None,
                 platforms=None,
                 interpreter=None,
                 conn_timeout=None):
        self._config = Config.load()
        self._target = target
        self._root = root_dir
        self._platforms = platforms
        self._interpreter = interpreter or PythonInterpreter.get()
        self._extra_targets = list(
            extra_targets) if extra_targets is not None else []
        self._extra_requirements = list(
            extra_requirements) if extra_requirements is not None else []
        self._builder = builder or PEXBuilder(tempfile.mkdtemp(),
                                              interpreter=self._interpreter)

        # Note: unrelated to the general pants artifact cache.
        self._egg_cache_root = os.path.join(
            PythonSetup(self._config).scratch_dir('artifact_cache',
                                                  default_name='artifacts'),
            str(self._interpreter.identity))

        self._key_generator = CacheKeyGenerator()
        self._build_invalidator = BuildInvalidator(self._egg_cache_root)

    def __del__(self):
        if os.getenv('PANTS_LEAVE_CHROOT') is None:
            safe_rmtree(self.path())
        else:
            self.debug('Left chroot at %s' % self.path())

    @property
    def builder(self):
        return self._builder

    def debug(self, msg, indent=0):
        if os.getenv('PANTS_VERBOSE') is not None:
            print('%s%s' % (' ' * indent, msg))

    def path(self):
        return self._builder.path()

    def _dump_library(self, library):
        def copy_to_chroot(base, path, add_function):
            src = os.path.join(self._root, base, path)
            add_function(src, path)

        self.debug('  Dumping library: %s' % library)
        for relpath in library.sources_relative_to_source_root():
            copy_to_chroot(library.target_base, relpath,
                           self._builder.add_source)

        for resources_tgt in library.resources:
            for resource_file_from_source_root in resources_tgt.sources_relative_to_source_root(
            ):
                copy_to_chroot(resources_tgt.target_base,
                               resource_file_from_source_root,
                               self._builder.add_resource)

    def _dump_requirement(self, req, dynamic, repo):
        self.debug('  Dumping requirement: %s%s%s' %
                   (str(req), ' (dynamic)' if dynamic else '',
                    ' (repo: %s)' % repo if repo else ''))
        self._builder.add_requirement(req, dynamic, repo)

    def _dump_distribution(self, dist):
        self.debug('  Dumping distribution: .../%s' %
                   os.path.basename(dist.location))
        self._builder.add_distribution(dist)

    def _generate_requirement(self, library, builder_cls):
        library_key = self._key_generator.key_for_target(library)
        builder = builder_cls(library, self._root, self._config,
                              '-' + library_key.hash[:8])

        cache_dir = os.path.join(self._egg_cache_root, library_key.id)
        if self._build_invalidator.needs_update(library_key):
            sdist = builder.build(interpreter=self._interpreter)
            safe_mkdir(cache_dir)
            shutil.copy(sdist, os.path.join(cache_dir,
                                            os.path.basename(sdist)))
            self._build_invalidator.update(library_key)

        return PythonRequirement(builder.requirement_string(),
                                 repository=cache_dir,
                                 use_2to3=True)

    def _generate_thrift_requirement(self, library):
        return self._generate_requirement(library, PythonThriftBuilder)

    def _generate_antlr_requirement(self, library):
        return self._generate_requirement(library, PythonAntlrBuilder)

    def resolve(self, targets):
        children = defaultdict(OrderedSet)

        def add_dep(trg):
            for target_type, target_key in self._VALID_DEPENDENCIES.items():
                if isinstance(trg, target_type):
                    children[target_key].add(trg)
                    return
                elif isinstance(trg, Dependencies):
                    return
            raise self.InvalidDependencyException(trg)

        for target in targets:
            target.walk(add_dep)
        return children

    def dump(self):
        self.debug('Building PythonBinary %s:' % self._target)
        targets = self.resolve([self._target] + self._extra_targets)

        for lib in targets['libraries'] | targets['binaries']:
            self._dump_library(lib)

        generated_reqs = OrderedSet()
        if targets['thrifts']:
            for thr in set(targets['thrifts']):
                if thr not in self.MEMOIZED_THRIFTS:
                    self.MEMOIZED_THRIFTS[
                        thr] = self._generate_thrift_requirement(thr)
                generated_reqs.add(self.MEMOIZED_THRIFTS[thr])

            generated_reqs.add(PythonRequirement('thrift', use_2to3=True))

        for antlr in targets['antlrs']:
            generated_reqs.add(self._generate_antlr_requirement(antlr))

        reqs_from_libraries = OrderedSet()
        for req_lib in targets['reqs']:
            for req in req_lib.payload.requirements:
                reqs_from_libraries.add(req)

        reqs_to_build = OrderedSet()
        for req in reqs_from_libraries | generated_reqs | self._extra_requirements:
            if not req.should_build(self._interpreter.python,
                                    Platform.current()):
                self.debug('Skipping %s based upon version filter' % req)
                continue
            reqs_to_build.add(req)
            self._dump_requirement(req._requirement, False, req._repository)

        platforms = self._platforms
        if isinstance(self._target, PythonBinary):
            platforms = self._target.platforms
        distributions = resolve_multi(self._config,
                                      reqs_to_build,
                                      interpreter=self._interpreter,
                                      platforms=platforms)

        locations = set()
        for platform, dist_set in distributions.items():
            for dist in dist_set:
                if dist.location not in locations:
                    self._dump_distribution(dist)
                locations.add(dist.location)

        if len(targets['binaries']) > 1:
            print('WARNING: Target has multiple python_binary targets!',
                  file=sys.stderr)

        return self._builder
Example #5
0
class InvalidationCacheManager(object):
  """Manages cache checks, updates and invalidation keeping track of basic change
  and invalidation statistics.
  Note that this is distinct from the ArtifactCache concept, and should probably be renamed.
  """

  class CacheValidationError(Exception):
    """Indicates a problem accessing the cache."""

  def __init__(self,
               cache_key_generator,
               build_invalidator_dir,
               invalidate_dependents,
               fingerprint_strategy=None):
    self._cache_key_generator = cache_key_generator
    self._invalidate_dependents = invalidate_dependents
    self._invalidator = BuildInvalidator(build_invalidator_dir)
    self._fingerprint_strategy = fingerprint_strategy

  def update(self, vts):
    """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
    for vt in vts.versioned_targets:
      self._invalidator.update(vt.cache_key)
      vt.valid = True
    self._invalidator.update(vts.cache_key)
    vts.valid = True

  def force_invalidate(self, vts):
    """Force invalidation of a VersionedTargetSet."""
    for vt in vts.versioned_targets:
      self._invalidator.force_invalidate(vt.cache_key)
      vt.valid = False
    self._invalidator.force_invalidate(vts.cache_key)
    vts.valid = False

  def check(self,
            targets,
            partition_size_hint=None,
            target_colors=None,
            topological_order=False):
    """Checks whether each of the targets has changed and invalidates it if so.

    Returns a list of VersionedTargetSet objects (either valid or invalid). The returned sets
    'cover' the input targets, possibly partitioning them, with one caveat: if the FingerprintStrategy
    opted out of fingerprinting a target because it doesn't contribute to invalidation, then that
    target will be excluded from all_vts, invalid_vts, and the partitioned VTS.

    Callers can inspect these vts and rebuild the invalid ones, for example.

    If target_colors is specified, it must be a map from Target -> opaque 'color' values.
    Two Targets will be in the same partition only if they have the same color.
    """
    all_vts = self.wrap_targets(targets, topological_order=topological_order)
    invalid_vts = filter(lambda vt: not vt.valid, all_vts)
    return InvalidationCheck(all_vts, invalid_vts, partition_size_hint, target_colors)

  def wrap_targets(self, targets, topological_order=False):
    """Wrap targets and their computed cache keys in VersionedTargets.

    If the FingerprintStrategy opted out of providing a fingerprint for a target, that target will not
    have an associated VersionedTarget returned.

    Returns a list of VersionedTargets, each representing one input target.
    """
    def vt_iter():
      if topological_order:
        sorted_targets = [t for t in reversed(sort_targets(targets)) if t in targets]
      else:
        sorted_targets = sorted(targets)
      for target in sorted_targets:
        target_key = self._key_for(target)
        if target_key is not None:
          yield VersionedTarget(self, target, target_key)
    return list(vt_iter())

  def needs_update(self, cache_key):
    return self._invalidator.needs_update(cache_key)

  def _key_for(self, target):
    try:
      return self._cache_key_generator.key_for_target(target,
                                                      transitive=self._invalidate_dependents,
                                                      fingerprint_strategy=self._fingerprint_strategy)
    except Exception as e:
      # This is a catch-all for problems we haven't caught up with and given a better diagnostic.
      # TODO(Eric Ayers): If you see this exception, add a fix to catch the problem earlier.
      exc_info = sys.exc_info()
      new_exception = self.CacheValidationError("Problem validating target {} in {}: {}"
                                                .format(target.id, target.address.spec_path, e))

      raise self.CacheValidationError, new_exception, exc_info[2]
Example #6
0
class InvalidationCacheManager(object):
  """Manages cache checks, updates and invalidation keeping track of basic change
  and invalidation statistics.
  Note that this is distinct from the ArtifactCache concept, and should probably be renamed.
  """

  class CacheValidationError(Exception):
    """Indicates a problem accessing the cache."""

  def __init__(self,
               cache_key_generator,
               build_invalidator_dir,
               invalidate_dependents,
               extra_data,
               fingerprint_strategy=None):
    self._cache_key_generator = cache_key_generator
    self._invalidate_dependents = invalidate_dependents
    self._extra_data = pickle.dumps(extra_data)  # extra_data may be None.
    self._invalidator = BuildInvalidator(build_invalidator_dir)
    self._fingerprint_strategy = fingerprint_strategy

  def update(self, vts):
    """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
    for vt in vts.versioned_targets:
      self._invalidator.update(vt.cache_key)
      vt.valid = True
    self._invalidator.update(vts.cache_key)
    vts.valid = True

  def force_invalidate(self, vts):
    """Force invalidation of a VersionedTargetSet."""
    for vt in vts.versioned_targets:
      self._invalidator.force_invalidate(vt.cache_key)
      vt.valid = False
    self._invalidator.force_invalidate(vts.cache_key)
    vts.valid = False

  def check(self,
            targets,
            partition_size_hint=None,
            target_colors=None):
    """Checks whether each of the targets has changed and invalidates it if so.

    Returns a list of VersionedTargetSet objects (either valid or invalid). The returned sets
    'cover' the input targets, possibly partitioning them, and are in topological order.
    The caller can inspect these in order and, e.g., rebuild the invalid ones.

    If target_colors is specified, it must be a map from Target -> opaque 'color' values.
    Two Targets will be in the same partition only if they have the same color.
    """
    all_vts = self._sort_and_validate_targets(targets)
    invalid_vts = filter(lambda vt: not vt.valid, all_vts)
    return InvalidationCheck(all_vts, invalid_vts, partition_size_hint, target_colors)

  def _sort_and_validate_targets(self, targets):
    """Validate each target.

    Returns a topologically ordered set of VersionedTargets, each representing one input target.
    """
    # We must check the targets in this order, to ensure correctness if invalidate_dependents=True,
    # since we use earlier cache keys to compute later cache keys in this case.
    ordered_targets = self._order_target_list(targets)

    # This will be a list of VersionedTargets that correspond to @targets.
    versioned_targets = []

    # This will be a mapping from each target to its corresponding VersionedTarget.
    versioned_targets_by_target = {}

    # Map from id to current fingerprint of the target with that id. We update this as we iterate,
    # in topological order, so when handling a target, this will already contain all its deps (in
    # this round).
    id_to_hash = {}

    for target in ordered_targets:
      cache_key = self._key_for(target, transitive=self._invalidate_dependents)
      id_to_hash[target.id] = cache_key.hash

      # Create a VersionedTarget corresponding to @target.
      versioned_target = VersionedTarget(self, target, cache_key)

      # Add the new VersionedTarget to the list of computed VersionedTargets.
      versioned_targets.append(versioned_target)

    return versioned_targets

  def needs_update(self, cache_key):
    return self._invalidator.needs_update(cache_key)

  def _order_target_list(self, targets):
    """Orders the targets topologically, from least to most dependent."""
    return filter(targets.__contains__, reversed(sort_targets(targets)))

  def _key_for(self, target, transitive=False):
    try:
      return self._cache_key_generator.key_for_target(target,
                                                      transitive=transitive,
                                                      fingerprint_strategy=self._fingerprint_strategy)
    except Exception as e:
      # This is a catch-all for problems we haven't caught up with and given a better diagnostic.
      # TODO(Eric Ayers): If you see this exception, add a fix to catch the problem earlier.
      exc_info = sys.exc_info()
      new_exception = self.CacheValidationError("Problem validating target %s in %s: %s" %
                                                (target.id, target.address.spec_path, e))

      raise self.CacheValidationError, new_exception, exc_info[2]
Example #7
0
class PythonChroot(object):
    _VALID_DEPENDENCIES = {
        PrepCommand: 'prep',
        PythonLibrary: 'libraries',
        PythonRequirementLibrary: 'reqs',
        PythonBinary: 'binaries',
        PythonThriftLibrary: 'thrifts',
        PythonAntlrLibrary: 'antlrs',
        PythonTests: 'tests'
    }

    MEMOIZED_THRIFTS = {}

    class InvalidDependencyException(Exception):
        def __init__(self, target):
            Exception.__init__(
                self,
                "Not a valid Python dependency! Found: {}".format(target))

    @staticmethod
    def get_platforms(platform_list):
        return tuple({
            Platform.current() if p == 'current' else p
            for p in platform_list
        })

    def __init__(self,
                 python_setup,
                 python_repos,
                 ivy_bootstrapper,
                 thrift_binary_factory,
                 interpreter,
                 builder,
                 targets,
                 platforms,
                 extra_requirements=None):
        self._python_setup = python_setup
        self._python_repos = python_repos
        self._ivy_bootstrapper = ivy_bootstrapper
        self._thrift_binary_factory = thrift_binary_factory

        self._interpreter = interpreter
        self._builder = builder
        self._targets = targets
        self._platforms = platforms
        self._extra_requirements = list(
            extra_requirements) if extra_requirements else []

        # Note: unrelated to the general pants artifact cache.
        self._artifact_cache_root = os.path.join(
            self._python_setup.artifact_cache_dir,
            str(self._interpreter.identity))
        self._key_generator = CacheKeyGenerator()
        self._build_invalidator = BuildInvalidator(self._artifact_cache_root)

    def delete(self):
        """Deletes this chroot from disk if it has been dumped."""
        safe_rmtree(self.path())

    def debug(self, msg, indent=0):
        if os.getenv('PANTS_VERBOSE') is not None:
            print('{}{}'.format(' ' * indent, msg))

    def path(self):
        return os.path.realpath(self._builder.path())

    def pex(self):
        return PEX(self.path(), interpreter=self._interpreter)

    def package_pex(self, filename):
        """Package into a PEX zipfile.

    :param filename: The filename where the PEX should be stored.
    """
        self._builder.build(filename)

    def _dump_library(self, library):
        def copy_to_chroot(base, path, add_function):
            src = os.path.join(get_buildroot(), base, path)
            add_function(src, path)

        self.debug('  Dumping library: {}'.format(library))
        for relpath in library.sources_relative_to_source_root():
            try:
                copy_to_chroot(library.target_base, relpath,
                               self._builder.add_source)
            except OSError:
                logger.error(
                    "Failed to copy {path} for library {library}".format(
                        path=os.path.join(library.target_base, relpath),
                        library=library))
                raise

        for resources_tgt in library.resources:
            for resource_file_from_source_root in resources_tgt.sources_relative_to_source_root(
            ):
                try:
                    copy_to_chroot(resources_tgt.target_base,
                                   resource_file_from_source_root,
                                   self._builder.add_resource)
                except OSError:
                    logger.error(
                        "Failed to copy {path} for resource {resource}".format(
                            path=os.path.join(resources_tgt.target_base,
                                              resource_file_from_source_root),
                            resource=resources_tgt.address.spec))
                    raise

    def _dump_requirement(self, req):
        self.debug('  Dumping requirement: {}'.format(req))
        self._builder.add_requirement(req)

    def _dump_distribution(self, dist):
        self.debug('  Dumping distribution: .../{}'.format(
            os.path.basename(dist.location)))
        self._builder.add_distribution(dist)

    def _generate_requirement(self, library, builder_cls):
        library_key = self._key_generator.key_for_target(library)
        builder = builder_cls(target=library,
                              root_dir=get_buildroot(),
                              target_suffix='-' + library_key.hash[:8])

        cache_dir = os.path.join(self._artifact_cache_root, library_key.id)
        if self._build_invalidator.needs_update(library_key):
            sdist = builder.build(interpreter=self._interpreter)
            safe_mkdir(cache_dir)
            shutil.copy(sdist, os.path.join(cache_dir,
                                            os.path.basename(sdist)))
            self._build_invalidator.update(library_key)

        return PythonRequirement(builder.requirement_string(),
                                 repository=cache_dir,
                                 use_2to3=True)

    def _generate_thrift_requirement(self, library):
        thrift_builder = functools.partial(
            PythonThriftBuilder,
            thrift_binary_factory=self._thrift_binary_factory,
            workdir=safe_mkdtemp(dir=self.path(), prefix='thrift.'))
        return self._generate_requirement(library, thrift_builder)

    def _generate_antlr_requirement(self, library):
        antlr_builder = functools.partial(
            PythonAntlrBuilder,
            ivy_bootstrapper=self._ivy_bootstrapper,
            workdir=safe_mkdtemp(dir=self.path(), prefix='antlr.'))
        return self._generate_requirement(library, antlr_builder)

    def resolve(self, targets):
        children = defaultdict(OrderedSet)

        def add_dep(trg):
            # Currently we handle all of our code generation, so we don't want to operate over any
            # synthetic targets injected upstream.
            # TODO(John Sirois): Revisit this when building a proper python product pipeline.
            if trg.is_synthetic:
                return

            for target_type, target_key in self._VALID_DEPENDENCIES.items():
                if isinstance(trg, target_type):
                    children[target_key].add(trg)
                    return
                elif isinstance(trg, Dependencies):
                    return
            raise self.InvalidDependencyException(trg)

        for target in targets:
            target.walk(add_dep)
        return children

    def dump(self):
        self.debug('Building chroot for {}:'.format(self._targets))
        targets = self.resolve(self._targets)

        for lib in targets['libraries'] | targets['binaries']:
            self._dump_library(lib)

        generated_reqs = OrderedSet()
        if targets['thrifts']:
            for thr in set(targets['thrifts']):
                if thr not in self.MEMOIZED_THRIFTS:
                    self.MEMOIZED_THRIFTS[
                        thr] = self._generate_thrift_requirement(thr)
                generated_reqs.add(self.MEMOIZED_THRIFTS[thr])

            generated_reqs.add(PythonRequirement('thrift', use_2to3=True))

        for antlr in targets['antlrs']:
            generated_reqs.add(self._generate_antlr_requirement(antlr))

        reqs_from_libraries = OrderedSet()
        for req_lib in targets['reqs']:
            for req in req_lib.payload.requirements:
                reqs_from_libraries.add(req)

        reqs_to_build = OrderedSet()
        find_links = OrderedSet()

        for req in reqs_from_libraries | generated_reqs | self._extra_requirements:
            if not req.should_build(self._interpreter.python,
                                    Platform.current()):
                self.debug('Skipping {} based upon version filter'.format(req))
                continue
            reqs_to_build.add(req)
            self._dump_requirement(req.requirement)
            if req.repository:
                find_links.add(req.repository)

        distributions = self._resolve_multi(reqs_to_build, find_links)

        locations = set()
        for platform, dist_set in distributions.items():
            for dist in dist_set:
                if dist.location not in locations:
                    self._dump_distribution(dist)
                locations.add(dist.location)

        if len(targets['binaries']) > 1:
            print('WARNING: Target has multiple python_binary targets!',
                  file=sys.stderr)

        return self._builder

    def _resolve_multi(self, requirements, find_links):
        """Multi-platform dependency resolution for PEX files.

       Given a pants configuration and a set of requirements, return a list of distributions
       that must be included in order to satisfy them.  That may involve distributions for
       multiple platforms.

       :param requirements: A list of :class:`PythonRequirement` objects to resolve.
       :param find_links: Additional paths to search for source packages during resolution.
    """
        distributions = dict()
        platforms = self.get_platforms(self._platforms
                                       or self._python_setup.platforms)
        fetchers = self._python_repos.get_fetchers()
        fetchers.extend(Fetcher([path]) for path in find_links)
        context = self._python_repos.get_network_context()

        for platform in platforms:
            distributions[platform] = resolve(
                requirements=[req.requirement for req in requirements],
                interpreter=self._interpreter,
                fetchers=fetchers,
                platform=platform,
                context=context,
                cache=self._python_setup.resolver_cache_dir,
                cache_ttl=self._python_setup.resolver_cache_ttl)

        return distributions
Example #8
0
class PythonChroot(object):
  _VALID_DEPENDENCIES = {
    PrepCommand: 'prep',
    PythonLibrary: 'libraries',
    PythonRequirementLibrary: 'reqs',
    PythonBinary: 'binaries',
    PythonThriftLibrary: 'thrifts',
    PythonAntlrLibrary: 'antlrs',
    PythonTests: 'tests'
  }

  class InvalidDependencyException(Exception):
    def __init__(self, target):
      Exception.__init__(self, "Not a valid Python dependency! Found: {}".format(target))

  @staticmethod
  def get_platforms(platform_list):
    return tuple({Platform.current() if p == 'current' else p for p in platform_list})

  def __init__(self,
               python_setup,
               python_repos,
               ivy_bootstrapper,
               thrift_binary_factory,
               interpreter,
               builder,
               targets,
               platforms,
               extra_requirements=None,
               log=None):
    self._python_setup = python_setup
    self._python_repos = python_repos
    self._ivy_bootstrapper = ivy_bootstrapper
    self._thrift_binary_factory = thrift_binary_factory

    self._interpreter = interpreter
    self._builder = builder
    self._targets = targets
    self._platforms = platforms
    self._extra_requirements = list(extra_requirements) if extra_requirements else []
    self._logger = log or logger

    # Note: unrelated to the general pants artifact cache.
    self._artifact_cache_root = os.path.join(
      self._python_setup.artifact_cache_dir, str(self._interpreter.identity))
    self._key_generator = CacheKeyGenerator()
    self._build_invalidator = BuildInvalidator(self._artifact_cache_root)

  def delete(self):
    """Deletes this chroot from disk if it has been dumped."""
    safe_rmtree(self.path())

  def debug(self, msg):
    self._logger.debug(msg)

  def path(self):
    return os.path.realpath(self._builder.path())

  def pex(self):
    return PEX(self.path(), interpreter=self._interpreter)

  def package_pex(self, filename):
    """Package into a PEX zipfile.

    :param filename: The filename where the PEX should be stored.
    """
    self._builder.build(filename)

  def _dump_library(self, library):
    def copy_to_chroot(base, path, add_function):
      src = os.path.join(get_buildroot(), base, path)
      add_function(src, path)

    self.debug('  Dumping library: {}'.format(library))
    for relpath in library.sources_relative_to_source_root():
      try:
        copy_to_chroot(library.target_base, relpath, self._builder.add_source)
      except OSError:
        logger.error("Failed to copy {path} for library {library}"
                     .format(path=os.path.join(library.target_base, relpath),
                             library=library))
        raise

    for resources_tgt in library.resources:
      for resource_file_from_source_root in resources_tgt.sources_relative_to_source_root():
        try:
          copy_to_chroot(resources_tgt.target_base, resource_file_from_source_root,
                         self._builder.add_resource)
        except OSError:
          logger.error("Failed to copy {path} for resource {resource}"
                       .format(path=os.path.join(resources_tgt.target_base,
                                                 resource_file_from_source_root),
                               resource=resources_tgt.address.spec))
          raise

  def _dump_requirement(self, req):
    self.debug('  Dumping requirement: {}'.format(req))
    self._builder.add_requirement(req)

  def _dump_distribution(self, dist):
    self.debug('  Dumping distribution: .../{}'.format(os.path.basename(dist.location)))
    self._builder.add_distribution(dist)

  def _generate_requirement(self, library, builder_cls):
    library_key = self._key_generator.key_for_target(library)
    builder = builder_cls(target=library,
                          root_dir=get_buildroot(),
                          target_suffix='-' + library_key.hash[:8])

    cache_dir = os.path.join(self._artifact_cache_root, library_key.id)
    if self._build_invalidator.needs_update(library_key):
      sdist = builder.build(interpreter=self._interpreter)
      safe_mkdir(cache_dir)
      shutil.copy(sdist, os.path.join(cache_dir, os.path.basename(sdist)))
      self._build_invalidator.update(library_key)

    return PythonRequirement(builder.requirement_string(), repository=cache_dir, use_2to3=True)

  def _generate_thrift_requirement(self, library):
    thrift_builder = functools.partial(PythonThriftBuilder,
                                       thrift_binary_factory=self._thrift_binary_factory,
                                       workdir=safe_mkdtemp(dir=self.path(), prefix='thrift.'))
    return self._generate_requirement(library, thrift_builder)

  def _generate_antlr_requirement(self, library):
    antlr_builder = functools.partial(PythonAntlrBuilder,
                                      ivy_bootstrapper=self._ivy_bootstrapper,
                                      workdir=safe_mkdtemp(dir=self.path(), prefix='antlr.'))
    return self._generate_requirement(library, antlr_builder)

  def resolve(self, targets):
    children = defaultdict(OrderedSet)

    def add_dep(trg):
      # Currently we handle all of our code generation, so we don't want to operate over any
      # synthetic targets injected upstream.
      # TODO(John Sirois): Revisit this when building a proper python product pipeline.
      if trg.is_synthetic:
        return

      for target_type, target_key in self._VALID_DEPENDENCIES.items():
        if isinstance(trg, target_type):
          children[target_key].add(trg)
          return
        elif isinstance(trg, Dependencies):
          return
      raise self.InvalidDependencyException(trg)
    for target in targets:
      target.walk(add_dep)
    return children

  def dump(self):
    self.debug('Building chroot for {}:'.format(self._targets))
    targets = self.resolve(self._targets)

    for lib in targets['libraries'] | targets['binaries']:
      self._dump_library(lib)

    generated_reqs = OrderedSet()
    if targets['thrifts']:
      for thr in targets['thrifts']:
        generated_reqs.add(self._generate_thrift_requirement(thr))
      generated_reqs.add(PythonRequirement('thrift', use_2to3=True))

    for antlr in targets['antlrs']:
      generated_reqs.add(self._generate_antlr_requirement(antlr))

    reqs_from_libraries = OrderedSet()
    for req_lib in targets['reqs']:
      for req in req_lib.payload.requirements:
        reqs_from_libraries.add(req)

    reqs_to_build = OrderedSet()
    find_links = OrderedSet()

    for req in reqs_from_libraries | generated_reqs | self._extra_requirements:
      if not req.should_build(self._interpreter.python, Platform.current()):
        self.debug('Skipping {} based upon version filter'.format(req))
        continue
      reqs_to_build.add(req)
      self._dump_requirement(req.requirement)
      if req.repository:
        find_links.add(req.repository)

    distributions = self._resolve_multi(reqs_to_build, find_links)

    locations = set()
    for platform, dist_set in distributions.items():
      for dist in dist_set:
        if dist.location not in locations:
          self._dump_distribution(dist)
        locations.add(dist.location)

    if len(targets['binaries']) > 1:
      print('WARNING: Target has multiple python_binary targets!', file=sys.stderr)

    return self._builder

  def _resolve_multi(self, requirements, find_links):
    """Multi-platform dependency resolution for PEX files.

       Given a pants configuration and a set of requirements, return a list of distributions
       that must be included in order to satisfy them.  That may involve distributions for
       multiple platforms.

       :param requirements: A list of :class:`PythonRequirement` objects to resolve.
       :param find_links: Additional paths to search for source packages during resolution.
    """
    distributions = dict()
    platforms = self.get_platforms(self._platforms or self._python_setup.platforms)
    fetchers = self._python_repos.get_fetchers()
    fetchers.extend(Fetcher([path]) for path in find_links)
    context = self._python_repos.get_network_context()

    for platform in platforms:
      requirements_cache_dir = os.path.join(self._python_setup.resolver_cache_dir, str(self._interpreter.identity))
      distributions[platform] = resolve(
        requirements=[req.requirement for req in requirements],
        interpreter=self._interpreter,
        fetchers=fetchers,
        platform=platform,
        context=context,
        cache=requirements_cache_dir,
        cache_ttl=self._python_setup.resolver_cache_ttl)

    return distributions
Example #9
0
class CacheManager(object):
  """Manages cache checks, updates and invalidation keeping track of basic change
  and invalidation statistics.
  Note that this is distinct from the ArtifactCache concept, and should probably be renamed.
  """
  def __init__(self, cache_key_generator, build_invalidator_dir,
               invalidate_dependents, extra_data, only_externaldeps):
    self._cache_key_generator = cache_key_generator
    self._invalidate_dependents = invalidate_dependents
    self._extra_data = pickle.dumps(extra_data)  # extra_data may be None.
    self._sources = NO_SOURCES if only_externaldeps else TARGET_SOURCES

    self._invalidator = BuildInvalidator(build_invalidator_dir)

  def update(self, vts):
    """Mark a changed or invalidated VersionedTargetSet as successfully processed."""
    for vt in vts.versioned_targets:
      self._invalidator.update(vt.cache_key)
      vt.valid = True
    self._invalidator.update(vts.cache_key)
    vts.valid = True

  def force_invalidate(self, vts):
    """Force invalidation of a VersionedTargetSet."""
    for vt in vts.versioned_targets:
      self._invalidator.force_invalidate(vt.cache_key)
      vt.valid = False
    self._invalidator.force_invalidate(vts.cache_key)
    vts.valid = False

  def check(self, targets, partition_size_hint=None):
    """Checks whether each of the targets has changed and invalidates it if so.

    Returns a list of VersionedTargetSet objects (either valid or invalid). The returned sets
    'cover' the input targets, possibly partitioning them, and are in topological order.
    The caller can inspect these in order and, e.g., rebuild the invalid ones.
    """
    all_vts = self._sort_and_validate_targets(targets)
    invalid_vts = filter(lambda vt: not vt.valid, all_vts)
    return InvalidationCheck(all_vts, invalid_vts, partition_size_hint)

  def _sort_and_validate_targets(self, targets):
    """Validate each target.

    Returns a topologically ordered set of VersionedTargets, each representing one input target.
    """
    # We must check the targets in this order, to ensure correctness if invalidate_dependents=True,
    # since we use earlier cache keys to compute later cache keys in this case.
    ordered_targets = self._order_target_list(targets)

    # This will be a list of VersionedTargets that correspond to @targets.
    versioned_targets = []

    # This will be a mapping from each target to its corresponding VersionedTarget.
    versioned_targets_by_target = {}

    # Map from id to current fingerprint of the target with that id. We update this as we iterate,
    # in topological order, so when handling a target, this will already contain all its deps (in
    # this round).
    id_to_hash = {}

    for target in ordered_targets:
      dependency_keys = set()
      if self._invalidate_dependents and hasattr(target, 'dependencies'):
        # Note that we only need to do this for the immediate deps, because those will already
        # reflect changes in their own deps.
        for dep in target.dependencies:
          # We rely on the fact that any deps have already been processed, either in an earlier
          # round or because they came first in ordered_targets.
          # Note that only external deps (e.g., JarDependency) or targets with sources can
          # affect invalidation. Other targets (JarLibrary, Pants) are just dependency scaffolding.
          if isinstance(dep, ExternalDependency):
            dependency_keys.add(dep.cache_key())
          elif isinstance(dep, TargetWithSources):
            fprint = id_to_hash.get(dep.id, None)
            if fprint is None:
              # It may have been processed in a prior round, and therefore the fprint should
              # have been written out by the invalidator.
              fprint = self._invalidator.existing_hash(dep.id)
              # Note that fprint may still be None here. E.g., a codegen target is in the list
              # of deps, but its fprint is not visible to our self._invalidator (that of the
              # target synthesized from it is visible, so invalidation will still be correct.)
              #
              # Another case where this can happen is a dep of a codegen target on, say,
              # a java target that hasn't been built yet (again, the synthesized target will
              # depend on that same java target, so invalidation will still be correct.)
              # TODO(benjy): Make this simpler and more obviously correct.
            if fprint is not None:
              dependency_keys.add(fprint)
          elif isinstance(dep, JarLibrary) or isinstance(dep, Pants):
            pass
          else:
            raise ValueError('Cannot calculate a cache_key for a dependency: %s' % dep)
      cache_key = self._key_for(target, dependency_keys)
      id_to_hash[target.id] = cache_key.hash

      # Create a VersionedTarget corresponding to @target.
      versioned_target = VersionedTarget(self, target, cache_key)

      # Add the new VersionedTarget to the list of computed VersionedTargets.
      versioned_targets.append(versioned_target)

      # Add to the mapping from Targets to VersionedTargets, for use in hooking up VersionedTarget
      # dependencies below.
      versioned_targets_by_target[target] = versioned_target

    # Having created all applicable VersionedTargets, now we build the VersionedTarget dependency
    # graph, looking through targets that don't correspond to VersionedTargets themselves.
    versioned_target_deps_by_target = {}

    def get_versioned_target_deps_for_target(target):
      # For every dependency of @target, we will store its corresponding VersionedTarget here. For
      # dependencies that don't correspond to a VersionedTarget (e.g. pass-through dependency
      # wrappers), we will resolve their actual dependencies and find VersionedTargets for them.
      versioned_target_deps = set([])
      if hasattr(target, 'dependencies'):
        for dep in target.dependencies:
          for dependency in dep.resolve():
            if dependency in versioned_targets_by_target:
              # If there exists a VersionedTarget corresponding to this Target, store it and
              # continue.
              versioned_target_deps.add(versioned_targets_by_target[dependency])
            elif dependency in versioned_target_deps_by_target:
              # Otherwise, see if we've already resolved this dependency to the VersionedTargets it
              # depends on, and use those.
              versioned_target_deps.update(versioned_target_deps_by_target[dependency])
            else:
              # Otherwise, compute the VersionedTargets that correspond to this dependency's
              # dependencies, cache and use the computed result.
              versioned_target_deps_by_target[dependency] = get_versioned_target_deps_for_target(
                  dependency)
              versioned_target_deps.update(versioned_target_deps_by_target[dependency])

      # Return the VersionedTarget dependencies that this target's VersionedTarget should depend on.
      return versioned_target_deps

    # Initialize all VersionedTargets to point to the VersionedTargets they depend on.
    for versioned_target in versioned_targets:
      versioned_target.dependencies = get_versioned_target_deps_for_target(versioned_target.target)

    return versioned_targets

  def needs_update(self, cache_key):
    return self._invalidator.needs_update(cache_key)

  def _order_target_list(self, targets):
    """Orders the targets topologically, from least to most dependent."""
    targets = set(t for t in targets if isinstance(t, Target))
    return filter(targets.__contains__, reversed(InternalTarget.sort_targets(targets)))

  def _key_for(self, target, dependency_keys):
    def fingerprint_extra(sha):
      sha.update(self._extra_data)
      for key in sorted(dependency_keys):  # Sort to ensure hashing in a consistent order.
        sha.update(key)

    return self._cache_key_generator.key_for_target(
      target,
      sources=self._sources,
      fingerprint_extra=fingerprint_extra
    )
Example #10
0
class PythonChroot(object):
  _VALID_DEPENDENCIES = {
    PythonLibrary: 'libraries',
    PythonRequirement: 'reqs',
    PythonBinary: 'binaries',
    PythonThriftLibrary: 'thrifts',
    PythonAntlrLibrary: 'antlrs',
    PythonTests: 'tests'
  }

  MEMOIZED_THRIFTS = {}

  class InvalidDependencyException(Exception):
    def __init__(self, target):
      Exception.__init__(self, "Not a valid Python dependency! Found: %s" % target)

  def __init__(self,
               target,
               root_dir,
               extra_targets=None,
               builder=None,
               platforms=None,
               interpreter=None,
               conn_timeout=None):
    self._config = Config.load()
    self._target = target
    self._root = root_dir
    self._platforms = platforms
    self._interpreter = interpreter or PythonInterpreter.get()
    self._extra_targets = list(extra_targets) if extra_targets is not None else []
    self._builder = builder or PEXBuilder(tempfile.mkdtemp(), interpreter=self._interpreter)

    # Note: unrelated to the general pants artifact cache.
    self._egg_cache_root = os.path.join(
        PythonSetup(self._config).scratch_dir('artifact_cache', default_name='artifacts'),
        str(self._interpreter.identity))

    self._key_generator = CacheKeyGenerator()
    self._build_invalidator = BuildInvalidator( self._egg_cache_root)


  def __del__(self):
    if os.getenv('PANTS_LEAVE_CHROOT') is None:
      safe_rmtree(self.path())
    else:
      self.debug('Left chroot at %s' % self.path())

  @property
  def builder(self):
    return self._builder

  def debug(self, msg, indent=0):
    if os.getenv('PANTS_VERBOSE') is not None:
      print('%s%s' % (' ' * indent, msg))

  def path(self):
    return self._builder.path()

  def _dump_library(self, library):
    def copy_to_chroot(base, path, add_function):
      src = os.path.join(self._root, base, path)
      add_function(src, path)

    self.debug('  Dumping library: %s' % library)
    for filename in library.sources:
      copy_to_chroot(library.target_base, filename, self._builder.add_source)
    for filename in library.resources:
      copy_to_chroot(library.target_base, filename, self._builder.add_resource)

  def _dump_requirement(self, req, dynamic, repo):
    self.debug('  Dumping requirement: %s%s%s' % (str(req),
      ' (dynamic)' if dynamic else '', ' (repo: %s)' % repo if repo else ''))
    self._builder.add_requirement(req, dynamic, repo)

  def _dump_distribution(self, dist):
    self.debug('  Dumping distribution: .../%s' % os.path.basename(dist.location))
    self._builder.add_distribution(dist)

  def _generate_requirement(self, library, builder_cls):
    library_key = self._key_generator.key_for_target(library)
    builder = builder_cls(library, self._root, self._config, '-' + library_key.hash[:8])

    cache_dir = os.path.join(self._egg_cache_root, library_key.id)
    if self._build_invalidator.needs_update(library_key):
      sdist = builder.build(interpreter=self._interpreter)
      safe_mkdir(cache_dir)
      shutil.copy(sdist, os.path.join(cache_dir, os.path.basename(sdist)))
      self._build_invalidator.update(library_key)

    with ParseContext.temp():
      return PythonRequirement(builder.requirement_string(), repository=cache_dir, use_2to3=True)

  def _generate_thrift_requirement(self, library):
    return self._generate_requirement(library, PythonThriftBuilder)

  def _generate_antlr_requirement(self, library):
    return self._generate_requirement(library, PythonAntlrBuilder)

  def resolve(self, targets):
    children = defaultdict(OrderedSet)
    def add_dep(trg):
      for target_type, target_key in self._VALID_DEPENDENCIES.items():
        if isinstance(trg, target_type):
          children[target_key].add(trg)
          return
      raise self.InvalidDependencyException(trg)
    for target in targets:
      target.walk(add_dep)
    return children

  def dump(self):
    self.debug('Building PythonBinary %s:' % self._target)

    targets = self.resolve([self._target] + self._extra_targets)

    for lib in targets['libraries'] | targets['binaries']:
      self._dump_library(lib)

    generated_reqs = OrderedSet()
    if targets['thrifts']:
      for thr in set(targets['thrifts']):
        if thr not in self.MEMOIZED_THRIFTS:
          self.MEMOIZED_THRIFTS[thr] = self._generate_thrift_requirement(thr)
        generated_reqs.add(self.MEMOIZED_THRIFTS[thr])
      with ParseContext.temp():
        # trick pants into letting us add this python requirement, otherwise we get
        # TargetDefinitionException: Error in target BUILD.temp:thrift: duplicate to
        # PythonRequirement(thrift)
        #
        # TODO(wickman) Instead of just blindly adding a PythonRequirement for thrift, we
        # should first detect if any explicit thrift requirements have been added and use
        # those.  Only if they have not been supplied should we auto-inject it.
        generated_reqs.add(PythonRequirement('thrift', use_2to3=True,
            name='thrift-' + ''.join(random.sample('0123456789abcdef' * 8, 8))))

    for antlr in targets['antlrs']:
      generated_reqs.add(self._generate_antlr_requirement(antlr))

    targets['reqs'] |= generated_reqs
    reqs_to_build = OrderedSet()
    for req in targets['reqs']:
      if not req.should_build(self._interpreter.python, Platform.current()):
        self.debug('Skipping %s based upon version filter' % req)
        continue
      reqs_to_build.add(req)
      self._dump_requirement(req._requirement, False, req._repository)

    platforms = self._platforms
    if isinstance(self._target, PythonBinary):
      platforms = self._target.platforms
    distributions = resolve_multi(
         self._config,
         reqs_to_build,
         interpreter=self._interpreter,
         platforms=platforms)

    locations = set()
    for platform, dist_set in distributions.items():
      for dist in dist_set:
        if dist.location not in locations:
          self._dump_distribution(dist)
        locations.add(dist.location)

    if len(targets['binaries']) > 1:
      print('WARNING: Target has multiple python_binary targets!', file=sys.stderr)

    return self._builder