Beispiel #1
0
    def _calculate_classpath(self, targets):
        jars = OrderedDict()
        excludes = set()

        # Support the ivy force concept when we sanely can for internal dep conflicts.
        # TODO(John Sirois): Consider supporting / implementing the configured ivy revision picking
        # strategy generally.
        def add_jar(jar):
            coordinate = (jar.org, jar.name)
            existing = jars.get(coordinate)
            jars[coordinate] = jar if not existing else (
                self._resolve_conflict(existing=existing, proposed=jar))

        def collect_jars(target):
            if target.is_jvm or target.is_jar_library:
                for jar in target.jar_dependencies:
                    if jar.rev:
                        add_jar(jar)

            # Lift jvm target-level excludes up to the global excludes set
            if target.is_jvm and target.payload.excludes:
                excludes.update(target.payload.excludes)

        for target in targets:
            target.walk(collect_jars)

        return jars.values(), excludes
Beispiel #2
0
 def __init__(self,
              checkpoint_root,
              verbose=True,
              task_killer=TaskKiller,
              executor_detector=ExecutorDetector,
              task_garbage_collector=TaskGarbageCollector,
              clock=time):
     ExecutorBase.__init__(self)
     ExceptionalThread.__init__(self)
     self.daemon = True
     self._stop_event = threading.Event()
     # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
     # which they were received via a launchTask.
     self._gc_task_queue = OrderedDict()
     # cache the ExecutorDriver provided by the slave, so we can use it out
     # of band from slave-initiated callbacks.  This should be supplied by
     # ExecutorBase.registered() when the executor first registers with the
     # slave.
     self._driver = None
     self._slave_id = None  # cache the slave ID provided by the slave
     self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
     self._start_time = None  # the start time of a task currently being executed, if any
     self._detector = executor_detector()
     self._collector = task_garbage_collector(root=checkpoint_root)
     self._clock = clock
     self._task_killer = task_killer
     self._checkpoint_root = checkpoint_root
     self._dropped_tasks = AtomicGauge('dropped_tasks')
     self.metrics.register(self._dropped_tasks)
Beispiel #3
0
  def _calculate_classpath(self, targets):
    jars = OrderedDict()
    excludes = set()

    # Support the ivy force concept when we sanely can for internal dep conflicts.
    # TODO(John Sirois): Consider supporting / implementing the configured ivy revision picking
    # strategy generally.
    def add_jar(jar):
      coordinate = (jar.org, jar.name)
      existing = jars.get(coordinate)
      jars[coordinate] = jar if not existing else (
        self._resolve_conflict(existing=existing, proposed=jar)
      )

    def collect_jars(target):
      if target.is_jvm or target.is_jar_library:
        for jar in target.jar_dependencies:
          if jar.rev:
            add_jar(jar)

      # Lift jvm target-level excludes up to the global excludes set
      if target.is_jvm and target.payload.excludes:
        excludes.update(target.payload.excludes)

    for target in targets:
      target.walk(collect_jars)

    return jars.values(), excludes
Beispiel #4
0
    def write(self, target, path, confs=None):
        def as_jar(internal_target):
            jar, _, _, _ = self.get_db(internal_target).as_jar_with_version(
                internal_target)
            return jar

        # TODO(John Sirois): a dict is used here to de-dup codegen targets which have both the original
        # codegen target - say java_thrift_library - and the synthetic generated target (java_library)
        # Consider reworking codegen tasks to add removal of the original codegen targets when rewriting
        # the graph
        dependencies = OrderedDict()
        internal_codegen = {}
        configurations = set()
        for dep in target_internal_dependencies(target):
            jar = as_jar(dep)
            dependencies[(jar.org, jar.name)] = self.internaldep(jar, dep)
            if dep.is_codegen:
                internal_codegen[jar.name] = jar.name
        for jar in target.jar_dependencies:
            if jar.rev:
                dependencies[(jar.org, jar.name)] = self.jardep(jar)
                configurations |= set(jar._configurations)

        target_jar = self.internaldep(
            as_jar(target), configurations=list(configurations)).extend(
                dependencies=dependencies.values())

        template_kwargs = self.templateargs(target_jar, confs)
        with safe_open(path, 'w') as output:
            template = pkgutil.get_data(__name__, self.template_relpath)
            Generator(template, **template_kwargs).write(output)
Beispiel #5
0
  def write(self, target, path, confs=None, extra_confs=None):
    # TODO(John Sirois): a dict is used here to de-dup codegen targets which have both the original
    # codegen target - say java_thrift_library - and the synthetic generated target (java_library)
    # Consider reworking codegen tasks to add removal of the original codegen targets when rewriting
    # the graph
    dependencies = OrderedDict()
    internal_codegen = {}
    configurations = set(confs or [])
    for dep in target_internal_dependencies(target):
      jar = self._as_versioned_jar(dep)
      dependencies[(jar.org, jar.name)] = self.internaldep(jar, dep)
      if dep.is_codegen:
        internal_codegen[jar.name] = jar.name
    for jar in target.jar_dependencies:
      if jar.rev:
        dependencies[(jar.org, jar.name)] = self.jardep(jar)
        configurations |= set(jar._configurations)

    target_jar = self.internaldep(self._as_versioned_jar(target),
                                  configurations=list(configurations))
    target_jar = target_jar.extend(dependencies=dependencies.values())

    template_kwargs = self.templateargs(target_jar, confs, extra_confs)
    with safe_open(path, 'w') as output:
      template = pkgutil.get_data(self.template_package_name, self.template_relpath)
      Generator(template, **template_kwargs).write(output)
Beispiel #6
0
  def write(self, target, path, confs=None):
    def as_jar(internal_target):
      jar, _, _, _ = self.get_db(internal_target).as_jar_with_version(internal_target)
      return jar

    # TODO(John Sirois): a dict is used here to de-dup codegen targets which have both the original
    # codegen target - say java_thrift_library - and the synthetic generated target (java_library)
    # Consider reworking codegen tasks to add removal of the original codegen targets when rewriting
    # the graph
    dependencies = OrderedDict()
    internal_codegen = {}
    for dep in target_internal_dependencies(target):
      jar = as_jar(dep)
      dependencies[(jar.org, jar.name)] = self.internaldep(jar, dep)
      if dep.is_codegen:
        internal_codegen[jar.name] = jar.name
    for jar in target.jar_dependencies:
      if jar.rev:
        dependencies[(jar.org, jar.name)] = self.jardep(jar)
    target_jar = self.internaldep(as_jar(target)).extend(dependencies=dependencies.values())

    template_kwargs = self.templateargs(target_jar, confs)
    with safe_open(path, 'w') as output:
      template = pkgutil.get_data(__name__, self.template_relpath)
      Generator(template, **template_kwargs).write(output)
Beispiel #7
0
  def execution_order(phases):
    """
      Yields goals in execution order for the given phases.  Does not account for goals run
      multiple times due to grouping.
    """
    dependencies_by_goal = OrderedDict()
    def populate_dependencies(phases):
      for phase in phases:
        for goal in phase.goals():
          if goal not in dependencies_by_goal:
            populate_dependencies(goal.dependencies)
            deps = OrderedSet()
            for phasedep in goal.dependencies:
              deps.update(phasedep.goals())
            dependencies_by_goal[goal] = deps
    populate_dependencies(phases)

    while dependencies_by_goal:
      for goal, deps in dependencies_by_goal.items():
        if not deps:
          dependencies_by_goal.pop(goal)
          for _, deps in dependencies_by_goal.items():
            if goal in deps:
              deps.discard(goal)
          yield goal
Beispiel #8
0
  def compile(self, classpath, sources, output_dir, analysis_cache, upstream_analysis_caches, depfile):
    # To pass options to scalac simply prefix with -S.
    args = ['-S' + x for x in self._scalac_args]

    def analysis_cache_full_path(analysis_cache_product):
      # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}.
      if len(analysis_cache_product) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems().next()
      if len(analysis_cache_files) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      return os.path.join(analysis_cache_dir, analysis_cache_files[0])

    # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>.
    analysis_map =\
    OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ])

    if len(analysis_map) > 0:
      args.extend([ '-analysis-map', ','.join(['%s:%s' % kv for kv in analysis_map.items()]) ])

    args.extend([
      '-analysis-cache', analysis_cache,
      '-classpath', ':'.join(self._zinc_classpath + classpath),
      '-output-products', depfile,
      '-mirror-analysis',
      '-d', output_dir
    ])
    args.extend(sources)
    return self.run_zinc(args)
Beispiel #9
0
 def reset(self):
   """Clear out the state of the BuildGraph, in particular Target mappings and dependencies."""
   self._addresses_already_closed = set()
   self._target_by_address = OrderedDict()
   self._target_dependencies_by_address = defaultdict(OrderedSet)
   self._target_dependees_by_address = defaultdict(set)
   self._derived_from_by_derivative_address = {}
Beispiel #10
0
  def identify_zinc_jars(zinc_classpath):
    """Find the named jars in the zinc classpath.

    TODO: Make these mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ZincUtils.identify_jars(ZincUtils.zinc_jar_names, zinc_classpath))
    return ret
Beispiel #11
0
  def identify_zinc_jars(zinc_classpath):
    """Find the named jars in the zinc classpath.

    TODO: Make these mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ZincUtils.identify_jars(ZincUtils.ZINC_JAR_NAMES, zinc_classpath))
    return ret
Beispiel #12
0
  def identify_zinc_jars(zinc_classpath):
    """Find the named jars in the zinc classpath.

    TODO: When profiles migrate to regular pants jar() deps instead of ivy.xml files we can
          make these mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ZincUtils.identify_jars(ZincUtils.zinc_jar_names, zinc_classpath))
    return ret
Beispiel #13
0
    def compile(self, classpath, sources, output_dir, analysis_cache,
                upstream_analysis_caches, depfile):
        compiler_classpath = nailgun_profile_classpath(self,
                                                       self._compile_profile)

        # To pass options to scalac simply prefix with -S.
        args = ['-S' + x for x in self._args]

        def analysis_cache_full_path(analysis_cache_product):
            # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}.
            if len(analysis_cache_product) != 1:
                raise TaskError(
                    'There can only be one analysis cache file per output directory'
                )
            analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems(
            ).next()
            if len(analysis_cache_files) != 1:
                raise TaskError(
                    'There can only be one analysis cache file per output directory'
                )
            return os.path.join(analysis_cache_dir, analysis_cache_files[0])

        # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>.
        analysis_map = \
          OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ])

        if len(analysis_map) > 0:
            args.extend([
                '-analysis-map',
                ','.join(['%s:%s' % kv for kv in analysis_map.items()])
            ])

        zinc_classpath = nailgun_profile_classpath(self, self._zinc_profile)
        zinc_jars = ScalaCompile.identify_zinc_jars(compiler_classpath,
                                                    zinc_classpath)
        for (name, jarpath) in zinc_jars.items(
        ):  # The zinc jar names are also the flag names.
            args.extend(['-%s' % name, jarpath])

        args.extend([
            '-analysis-cache', analysis_cache, '-log-level',
            self.context.options.log_level or 'info', '-classpath',
            ':'.join(zinc_classpath + classpath), '-output-products', depfile,
            '-d', output_dir
        ])

        if not self._color:
            args.append('-no-color')

        args.extend(sources)

        self.context.log.debug('Executing: %s %s' %
                               (self._main, ' '.join(args)))
        return self.runjava(self._main,
                            classpath=zinc_classpath,
                            args=args,
                            jvmargs=self._jvm_args)
Beispiel #14
0
  def identify_zinc_jars(zinc_classpath):
    """Find the named jars in the zinc classpath.

    TODO: When profiles migrate to regular pants jar() deps instead of ivy.xml files we can
          make these mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ZincUtils.identify_jars(ZincUtils.zinc_jar_names, zinc_classpath))
    return ret
Beispiel #15
0
  def identify_zinc_jars(compiler_classpath, zinc_classpath):
    """Find the named jars in the compiler and zinc classpaths.

    TODO: When profiles migrate to regular pants jar() deps instead of ivy.xml files we can make these
          mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ScalaCompile.identify_jars(ScalaCompile.compiler_jar_names, compiler_classpath))
    ret.update(ScalaCompile.identify_jars(ScalaCompile.zinc_jar_names, zinc_classpath))
    return ret
Beispiel #16
0
    def __init__(self, older_than=120, aggregation_depth=0):
        """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
        self._older_than = older_than
        self._aggregation_depth = aggregation_depth
        self._requests_by_timestamp = OrderedDict()
        self._lock = Lock()

        super(PerPathDatapoints, self).__init__()
Beispiel #17
0
def test_render_ordered():
  od = OrderedDict()
  od['a'] = 1
  od['b'] = 2
  od['c'] = 3
  assert java_options(od) == '-a 1 -b 2 -c 3'

  od = OrderedDict()
  od['c'] = 3
  od['b'] = 2
  od['a'] = 1
  assert java_options(od) == '-c 3 -b 2 -a 1'
Beispiel #18
0
  def compile(self, classpath, sources, output_dir, analysis_cache, upstream_analysis_caches, depfile):
    safe_mkdir(output_dir)
    compiler_classpath = nailgun_profile_classpath(self, self._compile_profile)
    compiler_args = []

    # TODO(John Sirois): separate compiler profile from runtime profile
    compiler_args.extend([
      # Support for outputting a dependencies file of source -> class
      '-Xplugin:%s' % self.get_depemitter_plugin(),
      '-P:depemitter:file:%s' % depfile
    ])
    compiler_args.extend(self._args)

    # To pass options to scalac simply prefix with -S.
    args = ['-S' + x for x in compiler_args]

    def analysis_cache_full_path(analysis_cache_product):
      # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}.
      if len(analysis_cache_product) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems().next()
      if len(analysis_cache_files) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      return os.path.join(analysis_cache_dir, analysis_cache_files[0])

    # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>.
    analysis_map = \
      OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ])

    if len(analysis_map) > 0:
      args.extend([ '-analysis-map', ','.join(['%s:%s' % kv for kv in analysis_map.items()]) ])
    upstream_classes_dirs = analysis_map.keys()

    zinc_classpath = nailgun_profile_classpath(self, self._zinc_profile)
    zinc_jars = ScalaCompile.identify_zinc_jars(compiler_classpath, zinc_classpath)
    for (name, jarpath) in zinc_jars.items():  # The zinc jar names are also the flag names.
      args.extend(['-%s' % name, jarpath])

    args.extend([
      '-analysis-cache', analysis_cache,
      '-log-level', self.context.options.log_level or 'info',
      '-classpath', ':'.join(zinc_classpath + classpath + upstream_classes_dirs),
      '-d', output_dir
    ])

    if not self._color:
      args.append('-no-color')

    args.extend(sources)

    self.context.log.debug('Executing: %s %s' % (self._main, ' '.join(args)))
    return self.runjava(self._main, classpath=zinc_classpath, args=args, jvmargs=self._jvm_args)
Beispiel #19
0
 def reset(self):
   """Clear out the state of the BuildGraph, in particular Target mappings and dependencies."""
   self._addresses_already_closed = set()
   self._target_by_address = OrderedDict()
   self._target_dependencies_by_address = defaultdict(OrderedSet)
   self._target_dependees_by_address = defaultdict(set)
   self._derived_from_by_derivative_address = {}
 def __init__(self,
              checkpoint_root,
              verbose=True,
              task_killer=TaskKiller,
              executor_detector=ExecutorDetector,
              task_garbage_collector=TaskGarbageCollector,
              clock=time):
   ExecutorBase.__init__(self)
   ExceptionalThread.__init__(self)
   self.daemon = True
   self._stop_event = threading.Event()
   # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
   # which they were received via a launchTask.
   self._gc_task_queue = OrderedDict()
   # cache the ExecutorDriver provided by the slave, so we can use it out
   # of band from slave-initiated callbacks.  This should be supplied by
   # ExecutorBase.registered() when the executor first registers with the
   # slave.
   self._driver = None
   self._slave_id = None  # cache the slave ID provided by the slave
   self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
   self._start_time = None  # the start time of a task currently being executed, if any
   self._detector = executor_detector()
   self._collector = task_garbage_collector(root=checkpoint_root)
   self._clock = clock
   self._task_killer = task_killer
   self._checkpoint_root = checkpoint_root
   self._dropped_tasks = AtomicGauge('dropped_tasks')
   self.metrics.register(self._dropped_tasks)
Beispiel #21
0
  def _calculate_sources(self, targets):
    """
    Find the appropriate source roots used for sources.

    :return: mapping of source roots to  set of sources under the roots
    """
    gentargets = OrderedSet()
    def add_to_gentargets(target):
      if self.is_gentarget(target):
        gentargets.add(target)
    self.context.build_graph.walk_transitive_dependency_graph(
      [target.address for target in targets],
      add_to_gentargets,
      postorder=True)
    sources_by_base = OrderedDict()
    # TODO(Eric Ayers) Extract this logic for general use? When using jar_source_set it is needed
    # to get the correct source root for paths outside the current BUILD tree.
    for target in gentargets:
      for source in target.sources_relative_to_buildroot():
        base = SourceRoot.find_by_path(source)
        if not base:
          base, _ = target.target_base, target.sources_relative_to_buildroot()
          self.context.log.debug('Could not find source root for {source}.'
                                 ' Missing call to SourceRoot.register()?  Fell back to {base}.'
                                 .format(source=source, base=base))
        if base not in sources_by_base:
          sources_by_base[base] = OrderedSet()
        sources_by_base[base].add(source)
    return sources_by_base
Beispiel #22
0
    def attempt(context, phases, timer=None):
        """
      Attempts to reach the goals for the supplied phases, optionally recording phase timings and
      then logging then when all specified phases have completed.
    """
        executed = OrderedDict()

        # I'd rather do this in a finally block below, but some goals os.fork and each of these cause
        # finally to run, printing goal timings multiple times instead of once at the end.
        def emit_timings():
            if timer:
                for phase, timings in executed.items():
                    for goal, times in timings.items():
                        timer.log('%s:%s' % (phase, goal), times)

        try:
            # Prepare tasks roots to leaves and allow for goals introducing new goals in existing phases.
            tasks_by_goal = {}
            expanded = OrderedSet()
            prepared = set()
            round = 0
            while True:
                goals = list(Phase.execution_order(phases))
                if set(goals) == prepared:
                    break
                else:
                    round += 1
                    context.log.debug('Preparing goals in round %d' % round)
                    for goal in reversed(goals):
                        if goal not in prepared:
                            phase = Phase.of(goal)
                            expanded.add(phase)
                            context.log.debug('preparing: %s:%s' %
                                              (phase, goal.name))
                            prepared.add(goal)
                            task = goal.prepare(context)
                            tasks_by_goal[goal] = task

            # Execute phases leaves to roots
            context.log.debug('Executing goals in phases %s' %
                              ' -> '.join(map(str, reversed(expanded))))
            for phase in phases:
                Group.execute(phase,
                              tasks_by_goal,
                              context,
                              executed,
                              timer=timer)

            emit_timings()
            return 0
        except (TaskError, GoalError) as e:
            message = '%s' % e
            if message:
                print('\nFAILURE: %s\n' % e)
            else:
                print('\nFAILURE\n')
            emit_timings()
            return 1
    def test_dump(self):
        props = OrderedDict()
        props['a'] = 1
        props['b'] = '''2
'''
        props['c'] = ' 3 : ='
        out = Compatibility.StringIO()
        Properties.dump(props, out)
        self.assertEquals('a=1\nb=2\\\n\nc=\\ 3\\ \\:\\ \\=\n', out.getvalue())
  def __init__(self, older_than=120, aggregation_depth=0):
    """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
    self._older_than = older_than
    self._aggregation_depth = aggregation_depth
    self._requests_by_timestamp = OrderedDict()
    self._lock = Lock()

    super(PerPathDatapoints, self).__init__()
Beispiel #25
0
def test_default_maybe_list():
  HELLO_WORLD = ['hello', 'world']
  assert maybe_list('hello') == ['hello']
  assert maybe_list(('hello', 'world')) == HELLO_WORLD
  assert maybe_list(['hello', 'world']) == HELLO_WORLD
  assert maybe_list(OrderedSet(['hello', 'world', 'hello'])) == HELLO_WORLD
  assert maybe_list(s for s in ('hello', 'world')) == HELLO_WORLD
  od = OrderedDict(hello=1)
  od.update(world=2)
  assert maybe_list(od) == HELLO_WORLD
  assert maybe_list([]) == []
  assert maybe_list(()) == []
  assert maybe_list(set()) == []

  with pytest.raises(ValueError):
    maybe_list(123)
  with pytest.raises(ValueError):
    maybe_list(['hello', 123])

  assert maybe_list(['hello', 123], expected_type=(str, int)) == ['hello', 123]
  assert maybe_list(['hello', 123], expected_type=(int, str)) == ['hello', 123]
Beispiel #26
0
    def execution_order(phases):
        """
      Yields goals in execution order for the given phases.  Does not account for goals run
      multiple times due to grouping.
    """
        dependencies_by_goal = OrderedDict()

        def populate_dependencies(phases):
            for phase in phases:
                for goal in phase.goals():
                    if goal not in dependencies_by_goal:
                        populate_dependencies(goal.dependencies)
                        deps = OrderedSet()
                        for phasedep in goal.dependencies:
                            deps.update(phasedep.goals())
                        dependencies_by_goal[goal] = deps

        populate_dependencies(phases)

        while dependencies_by_goal:
            for goal, deps in dependencies_by_goal.items():
                if not deps:
                    dependencies_by_goal.pop(goal)
                    for _, deps in dependencies_by_goal.items():
                        if goal in deps:
                            deps.discard(goal)
                    yield goal
Beispiel #27
0
 def execute_task(name, task, targets):
     """Execute and time a single goal that has had all of its dependencies satisfied."""
     try:
         # TODO (Senthil Kumaran):
         # Possible refactoring of the Task Execution Logic (AWESOME-1019)
         if getattr(context.options, 'explain', None):
             context.log.debug(
                 "Skipping execution of %s in explain mode" % name)
         else:
             task.execute(targets)
     finally:
         if phase not in executed:
             executed[phase] = OrderedDict()
Beispiel #28
0
  def compile(self, classpath, sources, output_dir, analysis_cache, upstream_analysis_caches, depfile):
    # To pass options to scalac simply prefix with -S.
    args = ['-S' + x for x in self._args]

    def analysis_cache_full_path(analysis_cache_product):
      # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}.
      if len(analysis_cache_product) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems().next()
      if len(analysis_cache_files) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      return os.path.join(analysis_cache_dir, analysis_cache_files[0])

    # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>.
    analysis_map = \
      OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ])

    args.extend(self._zinc_jar_args)

    if len(analysis_map) > 0:
      args.extend([ '-analysis-map', ','.join(['%s:%s' % kv for kv in analysis_map.items()]) ])

    args.extend([
      '-analysis-cache', analysis_cache,
      '-log-level', self.context.options.log_level or 'info',
      '-classpath', ':'.join(self._zinc_classpath + classpath),
      '-output-products', depfile,
      '-mirror-analysis',
      '-d', output_dir
    ])

    if not self._color:
      args.append('-no-color')

    args.extend(sources)

    self.context.log.debug('Executing: %s %s' % (self._main, ' '.join(args)))
    return self.runjava(self._main, classpath=self._zinc_classpath, args=args, jvmargs=self._jvm_args)
Beispiel #29
0
    def attempt(context, phases):
        """Attempts to reach the goals for the supplied phases."""
        executed = OrderedDict()

        try:
            # Prepare tasks roots to leaves and allow for goals introducing new goals in existing phases.
            tasks_by_goal = {}
            expanded = OrderedSet()
            prepared = set()
            round_ = 0
            while True:
                goals = list(Phase.execution_order(phases))
                if set(goals) == prepared:
                    break
                else:
                    round_ += 1
                    context.log.debug('Preparing goals in round %d' % round_)
                    for goal in reversed(goals):
                        if goal not in prepared:
                            phase = Phase.of(goal)
                            expanded.add(phase)
                            context.log.debug('preparing: %s:%s' %
                                              (phase, goal.name))
                            prepared.add(goal)
                            task = goal.prepare(context)
                            tasks_by_goal[goal] = task

            # Execute phases leaves to roots
            execution_phases = ' -> '.join(map(str, reversed(expanded)))

            context.log.debug('Executing goals in phases %s' %
                              execution_phases)

            if getattr(context.options, 'explain', None):
                print("Phase Execution Order:\n\n%s\n" % execution_phases)
                print("Phase [Goal->Task] Order:\n")

            for phase in phases:
                Group.execute(phase, tasks_by_goal, context, executed)

            ret = 0
        except (TargetDefinitionException, TaskError, GoalError) as e:

            message = '%s' % e
            if message:
                print('\nFAILURE: %s\n' % e)
            else:
                print('\nFAILURE\n')
            ret = 1
        return ret
def test_default_maybe_list():
    HELLO_WORLD = ['hello', 'world']
    assert maybe_list('hello') == ['hello']
    assert maybe_list(('hello', 'world')) == HELLO_WORLD
    assert maybe_list(['hello', 'world']) == HELLO_WORLD
    assert maybe_list(OrderedSet(['hello', 'world', 'hello'])) == HELLO_WORLD
    assert maybe_list(s for s in ('hello', 'world')) == HELLO_WORLD
    od = OrderedDict(hello=1)
    od.update(world=2)
    assert maybe_list(od) == HELLO_WORLD
    assert maybe_list([]) == []
    assert maybe_list(()) == []
    assert maybe_list(set()) == []

    with pytest.raises(ValueError):
        maybe_list(123)
    with pytest.raises(ValueError):
        maybe_list(['hello', 123])

    assert maybe_list(['hello', 123],
                      expected_type=(str, int)) == ['hello', 123]
    assert maybe_list(['hello', 123],
                      expected_type=(int, str)) == ['hello', 123]
Beispiel #31
0
 def execute_task(name, task, targets):
     """Execute and time a single goal that has had all of its dependencies satisfied."""
     start = timer.now() if timer else None
     try:
         task.execute(targets)
     finally:
         elapsed = timer.now() - start if timer else None
         if phase not in executed:
             executed[phase] = OrderedDict()
         if elapsed:
             phase_timings = executed[phase]
             if name not in phase_timings:
                 phase_timings[name] = []
             phase_timings[name].append(elapsed)
Beispiel #32
0
 def execute_task(name, task, targets):
     """Execute and time a single goal that has had all of its dependencies satisfied."""
     # We want the key for this group; we can find it using any representative member.
     # This first one is easy.
     try:
         # TODO (Senthil Kumaran):
         # Possible refactoring of the Task Execution Logic (AWESOME-1019)
         if getattr(context.options, 'explain', None):
             context.log.debug(
                 "Skipping execution of %s in explain mode" % name)
         else:
             task.execute(targets)
     finally:
         if phase not in executed:
             executed[phase] = OrderedDict()
Beispiel #33
0
    def _calculate_sources(self, targets):
        walked_targets = set()
        for target in targets:
            walked_targets.update(t for t in target.closure()
                                  if self.is_gentarget(t))

        sources_by_base = OrderedDict()
        for target in self.context.build_graph.targets():
            if target in walked_targets:
                base, sources = target.target_base, target.sources_relative_to_buildroot(
                )
                if base not in sources_by_base:
                    sources_by_base[base] = OrderedSet()
                sources_by_base[base].update(sources)
        return sources_by_base
Beispiel #34
0
  def identify_zinc_jars(compiler_classpath, zinc_classpath):
    """Find the named jars in the compiler and zinc classpaths.

    TODO: When profiles migrate to regular pants jar() deps instead of ivy.xml files we can make these
          mappings explicit instead of deriving them by jar name heuristics.
    """
    ret = OrderedDict()
    ret.update(ScalaCompile.identify_jars(ScalaCompile.compiler_jar_names, compiler_classpath))
    ret.update(ScalaCompile.identify_jars(ScalaCompile.zinc_jar_names, zinc_classpath))
    return ret
Beispiel #35
0
 def _calculate_sources(self, targets):
   def add_to_gentargets(target):
     if self.is_gentarget(target):
       gentargets.add(target)
   gentargets = OrderedSet()
   self.context.build_graph.walk_transitive_dependency_graph(
     [target.address for target in targets],
     add_to_gentargets,
     postorder=True)
   sources_by_base = OrderedDict()
   for target in gentargets:
     base, sources = target.target_base, target.sources_relative_to_buildroot()
     if base not in sources_by_base:
       sources_by_base[base] = OrderedSet()
     sources_by_base[base].update(sources)
   return sources_by_base
Beispiel #36
0
  def _parse(lines):
    def coalesce_lines():
      line_iter = iter(lines)
      try:
        buffer = ''
        while True:
          line = next(line_iter)
          if line.strip().endswith('\\'):
            # Continuation.
            buffer += line.strip()[:-1]
          else:
            if buffer:
              # Continuation join, preserve left hand ws (could be a kv separator)
              buffer += line.rstrip()
            else:
              # Plain old line
              buffer = line.strip()

            try:
              yield buffer
            finally:
              buffer = ''
      except StopIteration:
        pass

    def normalize(atom):
      return re.sub(r'\\([:=\s])', r'\1', atom.strip())

    def parse_line(line):
      if line and not (line.startswith('#') or line.startswith('!')):
        match = Properties._EXPLICIT_KV_SEP.search(line)
        if match:
          return normalize(line[:match.start()]), normalize(line[match.end():])
        else:
          space_sep = line.find(' ')
          if space_sep == -1:
            return normalize(line), ''
          else:
            return normalize(line[:space_sep]), normalize(line[space_sep:])

    props = OrderedDict()
    for line in coalesce_lines():
      kv_pair = parse_line(line)
      if kv_pair:
        key, value = kv_pair
        props[key] = value
    return props
Beispiel #37
0
class PerPathDatapoints(Thread):
    PURGE_SLEEP_TIME = 2  # sleep time between purging old datapoints
    DEFAULT_TOP_RESULTS = 10  # number of (top) results to show by default

    def __init__(self, older_than=120, aggregation_depth=0):
        """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
        self._older_than = older_than
        self._aggregation_depth = aggregation_depth
        self._requests_by_timestamp = OrderedDict()
        self._lock = Lock()

        super(PerPathDatapoints, self).__init__()

    def size(self):
        size = {"samples": 0, "requests_mem_usage": 0}
        with self._lock:
            samples, mem_usage = 0, 0
            for reqs in self._requests_by_timestamp.values():
                samples += len(reqs)
                mem_usage += sum(sys.getsizeof(r) for r in reqs)

        size["samples"] = samples
        size["requests_mem_usage"] = mem_usage
        size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"])
        size["ordered_dict_mem_usage"] = sizeof_fmt(
            sys.getsizeof(self._requests_by_timestamp))

        return size

    def run(self):
        """ drop samples that are too old """
        while True:
            time.sleep(self.PURGE_SLEEP_TIME)
            old_tstamp = time.time() - self._older_than
            with self._lock:
                for tstamp in self._requests_by_timestamp.keys():
                    if tstamp < old_tstamp:
                        del self._requests_by_timestamp[tstamp]

    def handle_request(self, request):
        if self._aggregation_depth > 0:
            request.path = intern(request.parent_path(self._aggregation_depth))

        with self._lock:
            tstamp = int(time.time())
            if tstamp not in self._requests_by_timestamp:
                self._requests_by_timestamp[tstamp] = []
            self._requests_by_timestamp[tstamp].append(request)

    def sum_minute(self,
                   top=DEFAULT_TOP_RESULTS,
                   order_by=Counters.WRITES,
                   display=[Counters.ALL],
                   view=AccumulatedStats.VIEW_BY_PATH):
        now = int(time.time())
        old = now - NUMBER_OF_DATAPOINTS
        stats = AccumulatedStats(StatsConfig())

        with self._lock:
            # note that this is an OrderedDict so samples are in chronological order
            for tstamp in self._requests_by_timestamp.keys():
                if tstamp < old:
                    continue

                if tstamp > now:
                    break

                for r in self._requests_by_timestamp[tstamp]:
                    stats.handle_request(r)

        return stats.dict(top=top,
                          order_by=order_by,
                          display_filters=display,
                          view=view)

    def datapoints_writes(self):
        return self._filter_datapoints(condition=lambda req: req.is_write)

    def datapoints_reads(self):
        return self._filter_datapoints(condition=lambda req: not req.is_write)

    def datapoints_for_op(self, op):
        return self._filter_datapoints(condition=lambda req: req.opcode == op)

    def datapoints_by_path_for_op(self, op, top):
        """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc.
        because why use Python if you can't abuse types?
        top is the number of results
    """
        if op == "writes":
            return self._datapoints_by_path_for_op_impl(
                lambda r: r.is_write, top)
        elif op == "reads":
            return self._datapoints_by_path_for_op_impl(
                lambda r: not r.is_write, top)
        else:
            return self._datapoints_by_path_for_op_impl(
                lambda r: r.opcode == op, top)

    def _datapoints_by_path_for_op_impl(self, request_filter, top):
        """ to make this moderately efficient we use a dict that
    provides a pre-populated list of datapoints.
    """
        tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
        datapoints = PathDatapoints()
        with self._lock:
            for i in range(0, NUMBER_OF_DATAPOINTS):
                if tstamp in self._requests_by_timestamp:
                    for req in self._requests_by_timestamp[tstamp]:
                        if request_filter(req):
                            dp = datapoints[req.path][i][1] + 1
                            datapoints[req.path][i] = (i, dp)
                tstamp += 1

        # sort
        def comparator(path_a, path_b):
            sum_a = sum(d[1] for d in datapoints[path_a])
            sum_b = sum(d[1] for d in datapoints[path_b])
            return sum_b - sum_a

        paths = sorted(datapoints.keys(), comparator)

        if len(paths) == 0:
            return [("/", datapoints["/"])]

        return [(p, datapoints[p]) for p in paths[0:top]]

    def _filter_datapoints(self, condition):
        tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
        datapoints = []
        for i in range(0, NUMBER_OF_DATAPOINTS):
            aggregate = sum(
                bool(condition(req))
                for req in self._requests_by_timestamp.get(tstamp, []))
            datapoints.append((i, aggregate))
            tstamp += 1

        return datapoints
Beispiel #38
0
class ThermosGCExecutor(ExecutorBase, ExceptionalThread, Observable):
    """
    Thermos GC Executor, responsible for:
      - garbage collecting old tasks to make sure they don't clutter up the system
      - state reconciliation with the scheduler (in case it thinks we're running
        something we're not or vice versa.)
  """
    MAX_PID_TIME_DRIFT = Amount(10, Time.SECONDS)
    MAX_CHECKPOINT_TIME_DRIFT = Amount(
        1, Time.HOURS)  # maximum runner disconnection time

    # how old a task must be before we're willing to kill it, assuming that there could be
    # slight races in the following scenario:
    #    launch gc with retained_tasks={t1, t2, t3}
    #    launch task t4
    MINIMUM_KILL_AGE = Amount(10, Time.MINUTES)

    # wait time between checking for new GC events from the slave and/or cleaning orphaned tasks
    POLL_WAIT = Amount(5, Time.MINUTES)

    # maximum amount of time the executor will wait with no tasks before it exits.
    MAXIMUM_EXECUTOR_WAIT = Amount(15, Time.MINUTES)

    # maximum lifetime of this executor.  this is to prevent older GC executor binaries from
    # running forever
    MAXIMUM_EXECUTOR_LIFETIME = Amount(1, Time.DAYS)

    PERSISTENCE_WAIT = Amount(5, Time.SECONDS)

    def __init__(self,
                 checkpoint_root,
                 verbose=True,
                 task_killer=TaskKiller,
                 executor_detector=ExecutorDetector,
                 task_garbage_collector=TaskGarbageCollector,
                 clock=time):
        ExecutorBase.__init__(self)
        ExceptionalThread.__init__(self)
        self.daemon = True
        self._stop_event = threading.Event()
        # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
        # which they were received via a launchTask.
        self._gc_task_queue = OrderedDict()
        # cache the ExecutorDriver provided by the slave, so we can use it out
        # of band from slave-initiated callbacks.  This should be supplied by
        # ExecutorBase.registered() when the executor first registers with the
        # slave.
        self._driver = None
        self._slave_id = None  # cache the slave ID provided by the slave
        self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
        self._start_time = None  # the start time of a task currently being executed, if any
        self._detector = executor_detector()
        self._collector = task_garbage_collector(root=checkpoint_root)
        self._clock = clock
        self._task_killer = task_killer
        self._checkpoint_root = checkpoint_root
        self._dropped_tasks = AtomicGauge('dropped_tasks')
        self.metrics.register(self._dropped_tasks)

    def _runner_ckpt(self, task_id):
        """Return the runner checkpoint file for a given task_id."""
        return TaskPath(root=self._checkpoint_root,
                        task_id=task_id).getpath('runner_checkpoint')

    def _terminate_task(self, task_id, kill=True):
        """Terminate a task using the associated task killer. Returns a boolean indicating success."""
        killer = self._task_killer(task_id, self._checkpoint_root)
        self.log('Terminating %s...' % task_id)
        runner_terminate = killer.kill if kill else killer.lose
        try:
            runner_terminate(force=True)
            return True
        except Exception as e:
            self.log('Could not terminate: %s' % e)
            return False

    def partition_tasks(self):
        """Return active/finished tasks as discovered from the checkpoint root."""
        detector = TaskDetector(root=self._checkpoint_root)
        active_tasks = set(
            t_id for _, t_id in detector.get_task_ids(state='active'))
        finished_tasks = set(
            t_id for _, t_id in detector.get_task_ids(state='finished'))
        return active_tasks, finished_tasks

    def get_states(self, task_id):
        """Returns the (timestamp, status) tuples of the task or [] if could not replay."""
        statuses = CheckpointDispatcher.iter_statuses(
            self._runner_ckpt(task_id))
        try:
            return [(state.timestamp_ms / 1000.0, state.state)
                    for state in statuses]
        except CheckpointDispatcher.ErrorRecoveringState:
            return []

    def get_sandbox(self, task_id):
        """Returns the sandbox of the task, or None if it has not yet been initialized."""
        try:
            for update in CheckpointDispatcher.iter_updates(
                    self._runner_ckpt(task_id)):
                if update.runner_header and update.runner_header.sandbox:
                    return update.runner_header.sandbox
        except CheckpointDispatcher.ErrorRecoveringState:
            return None

    def maybe_terminate_unknown_task(self, task_id):
        """Terminate a task if we believe the scheduler doesn't know about it.

       It's possible for the scheduler to queue a GC and launch a task afterwards, in which
       case we may see actively running tasks that the scheduler did not report in the
       AdjustRetainedTasks message.

       Returns:
         boolean indicating whether the task was terminated
    """
        states = self.get_states(task_id)
        if states:
            task_start_time, _ = states[0]
            if self._start_time - task_start_time > self.MINIMUM_KILL_AGE.as_(
                    Time.SECONDS):
                return self._terminate_task(task_id)
        return False

    def should_gc_task(self, task_id):
        """Check if a possibly-corrupt task should be locally GCed

      A task should be GCed if its checkpoint stream appears to be corrupted and the kill age
      threshold is exceeded.

       Returns:
         set, containing the task_id if it should be marked for local GC, or empty otherwise
    """
        runner_ckpt = self._runner_ckpt(task_id)
        if not os.path.exists(runner_ckpt):
            return set()
        latest_update = os.path.getmtime(runner_ckpt)
        if self._start_time - latest_update > self.MINIMUM_KILL_AGE.as_(
                Time.SECONDS):
            self.log(
                'Got corrupt checkpoint file for %s - marking for local GC' %
                task_id)
            return set([task_id])
        else:
            self.log(
                'Checkpoint file unreadable, but not yet beyond MINIMUM_KILL_AGE threshold'
            )
            return set()

    def reconcile_states(self, driver, retained_tasks):
        """Reconcile states that the scheduler thinks tasks are in vs what they really are in.

        Local    vs   Scheduler  => Action
       ===================================
        ACTIVE         ACTIVE    => no-op
        ACTIVE        STARTING   => no-op
        ACTIVE        TERMINAL   => maybe kill task*
        ACTIVE        !EXISTS    => maybe kill task*
       TERMINAL        ACTIVE    => send actual status**
       TERMINAL       STARTING   => send actual status**
       TERMINAL       TERMINAL   => no-op
       TERMINAL       !EXISTS    => gc locally
       !EXISTS         ACTIVE    => send LOST**
       !EXISTS        STARTING   => no-op
       !EXISTS        TERMINAL   => gc remotely

       * - Only kill if this does not appear to be a race condition.
       ** - These appear to have no effect

       Side effecting operations:
         ACTIVE   | (TERMINAL / !EXISTS) => maybe kill
         TERMINAL | !EXISTS              => delete
         !EXISTS  | TERMINAL             => delete

      Returns tuple of (local_gc, remote_gc, updates), where:
        local_gc - set of task_ids to be GCed locally
        remote_gc - set of task_ids to be deleted on the scheduler
        updates - dictionary of updates sent to the scheduler (task_id: ScheduleStatus)
    """
        def partition(rt):
            active, starting, finished = set(), set(), set()
            for task_id, schedule_status in rt.items():
                if schedule_status in TERMINAL_STATES:
                    finished.add(task_id)
                elif (schedule_status == ScheduleStatus.STARTING
                      or schedule_status == ScheduleStatus.ASSIGNED):
                    starting.add(task_id)
                else:
                    active.add(task_id)
            return active, starting, finished

        local_active, local_finished = self.partition_tasks()
        sched_active, sched_starting, sched_finished = partition(
            retained_tasks)
        local_task_ids = local_active | local_finished
        sched_task_ids = sched_active | sched_starting | sched_finished
        all_task_ids = local_task_ids | sched_task_ids

        self.log('Told to retain the following task ids:')
        for task_id, schedule_status in retained_tasks.items():
            self.log('  => %s as %s' % (task_id,
                                        ScheduleStatus._VALUES_TO_NAMES.get(
                                            schedule_status, 'UNKNOWN')))

        self.log('Local active tasks:')
        for task_id in local_active:
            self.log('  => %s' % task_id)

        self.log('Local finished tasks:')
        for task_id in local_finished:
            self.log('  => %s' % task_id)

        local_gc, remote_gc = set(), set()
        updates = {}

        for task_id in all_task_ids:
            if task_id in local_active and task_id not in (sched_active
                                                           | sched_starting):
                self.log('Inspecting task %s for termination.' % task_id)
                if not self.maybe_terminate_unknown_task(task_id):
                    local_gc.update(self.should_gc_task(task_id))
            if task_id in local_finished and task_id not in sched_task_ids:
                self.log('Queueing task %s for local deletion.' % task_id)
                local_gc.add(task_id)
            if task_id in local_finished and task_id in (sched_active
                                                         | sched_starting):
                self.log(
                    'Task %s finished but scheduler thinks active/starting.' %
                    task_id)
                states = self.get_states(task_id)
                if states:
                    _, last_state = states[-1]
                    updates[task_id] = THERMOS_TO_TWITTER_STATES.get(
                        last_state, ScheduleStatus.SANDBOX_DELETED)
                    self.send_update(
                        driver, task_id,
                        THERMOS_TO_MESOS_STATES.get(last_state,
                                                    mesos_pb2.TASK_LOST),
                        'Task finish detected by GC executor.')
                else:
                    local_gc.update(self.should_gc_task(task_id))
            if task_id in sched_finished and task_id not in local_task_ids:
                self.log('Queueing task %s for remote deletion.' % task_id)
                remote_gc.add(task_id)
            if task_id not in local_task_ids and task_id in sched_active:
                self.log(
                    'Know nothing about task %s, telling scheduler of LOSS.' %
                    task_id)
                updates[task_id] = ScheduleStatus.LOST
                self.send_update(driver, task_id, mesos_pb2.TASK_LOST,
                                 'GC executor found no trace of task.')
            if task_id not in local_task_ids and task_id in sched_starting:
                self.log(
                    'Know nothing about task %s, but scheduler says STARTING - passing'
                    % task_id)

        return local_gc, remote_gc, updates

    def clean_orphans(self, driver):
        """Inspect checkpoints for trees that have been kill -9'ed but not properly cleaned up."""
        self.log('Checking for orphaned tasks')
        active_tasks, _ = self.partition_tasks()
        updates = {}

        inspector = CheckpointInspector(self._checkpoint_root)

        def is_our_process(process, uid, timestamp):
            if process.uids().real != uid:
                return False
            estimated_start_time = self._clock.time() - process.create_time()
            return abs(timestamp -
                       estimated_start_time) < self.MAX_PID_TIME_DRIFT.as_(
                           Time.SECONDS)

        for task_id in active_tasks:
            self.log('Inspecting running task: %s' % task_id)
            inspection = inspector.inspect(task_id)
            if not inspection:
                self.log('  - Error inspecting task runner')
                continue
            latest_runner = inspection.runner_processes[-1]
            # Assume that it has not yet started?
            if not latest_runner:
                self.log('  - Task has no registered runners.')
                continue
            runner_pid, runner_uid, timestamp_ms = latest_runner
            try:
                runner_process = psutil.Process(runner_pid)
                if is_our_process(runner_process, runner_uid,
                                  timestamp_ms / 1000.0):
                    self.log('  - Runner appears healthy.')
                    continue
            except psutil.NoSuchProcess:
                # Runner is dead
                pass
            except psutil.Error as err:
                self.log('  - Error sampling runner process [pid=%s]: %s' %
                         (runner_pid, err))
                continue
            try:
                latest_update = os.path.getmtime(self._runner_ckpt(task_id))
            except (IOError, OSError) as err:
                self.log('  - Error accessing runner ckpt: %s' % err)
                continue
            if self._clock.time(
            ) - latest_update < self.MAX_CHECKPOINT_TIME_DRIFT.as_(
                    Time.SECONDS):
                self.log('  - Runner is dead but under LOST threshold.')
                continue
            self.log('  - Runner is dead but beyond LOST threshold: %.1fs' %
                     (self._clock.time() - latest_update))
            if self._terminate_task(task_id, kill=False):
                updates[task_id] = ScheduleStatus.LOST
                self.send_update(driver, task_id, mesos_pb2.TASK_LOST,
                                 'GC executor detected failed task runner.')

        return updates

    def _erase_sandbox(self, task_id):
        # TODO(wickman) Only mesos should be in the business of garbage collecting sandboxes.
        header_sandbox = self.get_sandbox(task_id)
        directory_sandbox = DirectorySandbox(
            header_sandbox) if header_sandbox else None
        if directory_sandbox and directory_sandbox.exists():
            self.log('Destroying DirectorySandbox for %s' % task_id)
            try:
                directory_sandbox.destroy()
            except DirectorySandbox.Error as e:
                self.log('Failed to destroy DirectorySandbox: %s' % e)
        else:
            self.log('Found no sandboxes for %s' % task_id)

    def _gc(self, task_id):
        """Erase the sandbox, logs and metadata of the given task."""
        self.log('Erasing sandbox for %s' % task_id)
        self._erase_sandbox(task_id)
        self.log('Erasing logs for %s' % task_id)
        self._collector.erase_logs(task_id)
        self.log('Erasing metadata for %s' % task_id)
        self._collector.erase_metadata(task_id)

    def garbage_collect(self, force_delete=frozenset()):
        """Garbage collect tasks on the system no longer active or in the supplied force_delete.

    Return a set of task_ids representing the tasks that were garbage collected.
    """
        active_tasks, finished_tasks = self.partition_tasks()
        retained_executors = set(iter(self.linked_executors))
        self.log('Executor sandboxes retained by Mesos:')
        if retained_executors:
            for r_e in sorted(retained_executors):
                self.log('  %s' % r_e)
        else:
            self.log('  None')
        for task_id in (active_tasks - retained_executors):
            self.log('ERROR: Active task %s had its executor sandbox pulled.' %
                     task_id)
        gc_tasks = (finished_tasks - retained_executors) | force_delete
        for gc_task in gc_tasks:
            self._gc(gc_task)
        return gc_tasks

    @property
    def linked_executors(self):
        """Generator yielding the executor sandboxes detected on the system."""
        thermos_executor_prefix = 'thermos-'
        for executor in self._detector:
            # It's possible for just the 'latest' symlink to be present but no run directories.
            # This indicates that the task has been fully garbage collected.
            if executor.executor_id.startswith(
                    thermos_executor_prefix) and executor.run != 'latest':
                yield executor.executor_id[len(thermos_executor_prefix):]

    def _run_gc(self, task, retain_tasks, retain_start):
        """
      Reconcile the set of tasks to retain (provided by the scheduler) with the current state of
      executors on this system. Garbage collect tasks/executors as appropriate.

      Not re-entrant! Previous executions must complete (and clear self._task_id) before this can be
      invoked.

      Potentially blocking (e.g. on I/O) in self.garbage_collect()

      Args:
        task: TaskInfo provided by the slave
        retain_tasks: mapping of task_id => ScheduleStatus, describing what the scheduler thinks is
                      running on this system
        retain_start: the time at which the retain_tasks message is effective -- this means that
                      tasks started after the retain_tasks message is effective are skipped
                      until future GC runs.
    """
        task_id = task.task_id.value
        if self._task_id is not None:
            raise RuntimeError(
                '_run_gc() called [task_id=%s], but already running [task_id=%s]'
                % (task_id, self._task_id))
        self._task_id = task_id
        self.log('Launching garbage collection [task_id=%s]' % task_id)
        self._start_time = retain_start
        local_gc, remote_gc, _ = self.reconcile_states(self._driver,
                                                       retain_tasks)
        deleted_tasks = set(retain_tasks).intersection(
            self.garbage_collect(local_gc)) | remote_gc
        if deleted_tasks:
            self._driver.sendFrameworkMessage(
                thrift_serialize(
                    SchedulerMessage(deletedTasks=DeletedTasks(
                        taskIds=deleted_tasks))))
        self.send_update(self._driver, task.task_id.value,
                         mesos_pb2.TASK_FINISHED,
                         'Garbage collection finished.')
        self.log('Garbage collection complete [task_id=%s]' % task_id)
        self._task_id = self._start_time = None

    def run(self):
        """Main GC executor event loop.

      Periodically perform state reconciliation with the set of tasks provided
      by the slave, and garbage collect orphaned tasks on the system.
    """
        run_start = self._clock.time()
        last_gc_run = self._clock.time()

        def should_terminate():
            now = self._clock.time()
            if now > run_start + self.MAXIMUM_EXECUTOR_LIFETIME.as_(
                    Time.SECONDS):
                return True
            if now > last_gc_run + self.MAXIMUM_EXECUTOR_WAIT.as_(
                    Time.SECONDS):
                return True
            return self._stop_event.is_set()

        while not should_terminate():
            try:
                _, (task, retain_tasks,
                    retain_start) = self._gc_task_queue.popitem(0)
                last_gc_run = retain_start
                self._run_gc(task, retain_tasks, retain_start)
            except KeyError:  # no enqueued GC tasks
                pass
            if self._driver is not None:
                self.clean_orphans(self._driver)
            self._stop_event.wait(self.POLL_WAIT.as_(Time.SECONDS))

        # shutdown
        if self._driver is not None:
            try:
                prev_task_id, _ = self._gc_task_queue.popitem(0)
            except KeyError:  # no enqueued GC tasks
                pass
            else:
                self.send_update(
                    self._driver, prev_task_id, mesos_pb2.TASK_FINISHED,
                    'Garbage collection skipped - GC executor shutting down')
                # TODO(jon) Remove this once external MESOS-243 is resolved.
                self.log(
                    'Sleeping briefly to mitigate https://issues.apache.org/jira/browse/MESOS-243'
                )
                self._clock.sleep(self.PERSISTENCE_WAIT.as_(Time.SECONDS))

            self._driver.stop()

    """ Mesos Executor API methods follow """

    def launchTask(self, driver, task):
        """Queue a new garbage collection run, and drop any currently-enqueued runs."""
        if self._slave_id is None:
            self._slave_id = task.slave_id.value
        task_id = task.task_id.value
        self.log('launchTask() got task_id: %s' % task_id)
        if task_id == self._task_id:
            self.log('=> GC with task_id %s currently running - ignoring' %
                     task_id)
            return
        elif task_id in self._gc_task_queue:
            self.log('=> Already have task_id %s queued - ignoring' % task_id)
            return
        try:
            art = thrift_deserialize(AdjustRetainedTasks(), task.data)
        except Exception as err:
            self.log('Error deserializing task: %s' % err)
            self.send_update(self._driver, task_id, mesos_pb2.TASK_FAILED,
                             'Deserialization of GC task failed')
            return
        try:
            prev_task_id, _ = self._gc_task_queue.popitem(0)
        except KeyError:  # no enqueued GC tasks - reset counter
            self._dropped_tasks.write(0)
        else:
            self.log('=> Dropping previously queued GC with task_id %s' %
                     prev_task_id)
            self._dropped_tasks.increment()
            self.log('=> Updating scheduler')
            self.send_update(
                self._driver, prev_task_id, mesos_pb2.TASK_FINISHED,
                'Garbage collection skipped - GC executor received another task'
            )
        self.log('=> Adding %s to GC queue' % task_id)
        self._gc_task_queue[task_id] = (task, art.retainedTasks,
                                        self._clock.time())

    def killTask(self, driver, task_id):
        """Remove the specified task from the queue, if it's not yet run. Otherwise, no-op."""
        self.log('killTask() got task_id: %s' % task_id)
        task = self._gc_task_queue.pop(task_id, None)
        if task is not None:
            self.log('=> Removed %s from queued GC tasks' % task_id)
        elif task_id == self._task_id:
            self.log('=> GC with task_id %s currently running - ignoring' %
                     task_id)
        else:
            self.log('=> Unknown task_id %s - ignoring' % task_id)

    def shutdown(self, driver):
        """Trigger the Executor to shut down as soon as the current GC run is finished."""
        self.log('shutdown() called - setting stop event')
        self._stop_event.set()
class ThermosGCExecutor(ExecutorBase, ExceptionalThread, Observable):
  """
    Thermos GC Executor, responsible for:
      - garbage collecting old tasks to make sure they don't clutter up the system
      - state reconciliation with the scheduler (in case it thinks we're running
        something we're not or vice versa.)
  """
  MAX_PID_TIME_DRIFT = Amount(10, Time.SECONDS)
  MAX_CHECKPOINT_TIME_DRIFT = Amount(1, Time.HOURS)  # maximum runner disconnection time

  # how old a task must be before we're willing to kill it, assuming that there could be
  # slight races in the following scenario:
  #    launch gc with retained_tasks={t1, t2, t3}
  #    launch task t4
  MINIMUM_KILL_AGE = Amount(10, Time.MINUTES)

  # wait time between checking for new GC events from the slave and/or cleaning orphaned tasks
  POLL_WAIT = Amount(5, Time.MINUTES)

  # maximum amount of time the executor will wait with no tasks before it exits.
  MAXIMUM_EXECUTOR_WAIT = Amount(15, Time.MINUTES)

  # maximum lifetime of this executor.  this is to prevent older GC executor binaries from
  # running forever
  MAXIMUM_EXECUTOR_LIFETIME = Amount(1, Time.DAYS)

  PERSISTENCE_WAIT = Amount(5, Time.SECONDS)

  def __init__(self,
               checkpoint_root,
               verbose=True,
               task_killer=TaskKiller,
               executor_detector=ExecutorDetector,
               task_garbage_collector=TaskGarbageCollector,
               clock=time):
    ExecutorBase.__init__(self)
    ExceptionalThread.__init__(self)
    self.daemon = True
    self._stop_event = threading.Event()
    # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
    # which they were received via a launchTask.
    self._gc_task_queue = OrderedDict()
    # cache the ExecutorDriver provided by the slave, so we can use it out
    # of band from slave-initiated callbacks.  This should be supplied by
    # ExecutorBase.registered() when the executor first registers with the
    # slave.
    self._driver = None
    self._slave_id = None  # cache the slave ID provided by the slave
    self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
    self._start_time = None  # the start time of a task currently being executed, if any
    self._detector = executor_detector()
    self._collector = task_garbage_collector(root=checkpoint_root)
    self._clock = clock
    self._task_killer = task_killer
    self._checkpoint_root = checkpoint_root
    self._dropped_tasks = AtomicGauge('dropped_tasks')
    self.metrics.register(self._dropped_tasks)

  def _runner_ckpt(self, task_id):
    """Return the runner checkpoint file for a given task_id."""
    return TaskPath(root=self._checkpoint_root, task_id=task_id).getpath('runner_checkpoint')

  def _terminate_task(self, task_id, kill=True):
    """Terminate a task using the associated task killer. Returns a boolean indicating success."""
    killer = self._task_killer(task_id, self._checkpoint_root)
    self.log('Terminating %s...' % task_id)
    runner_terminate = killer.kill if kill else killer.lose
    try:
      runner_terminate(force=True)
      return True
    except Exception as e:
      self.log('Could not terminate: %s' % e)
      return False

  def partition_tasks(self):
    """Return active/finished tasks as discovered from the checkpoint root."""
    detector = TaskDetector(root=self._checkpoint_root)
    active_tasks = set(t_id for _, t_id in detector.get_task_ids(state='active'))
    finished_tasks = set(t_id for _, t_id in detector.get_task_ids(state='finished'))
    return active_tasks, finished_tasks

  def get_states(self, task_id):
    """Returns the (timestamp, status) tuples of the task or [] if could not replay."""
    statuses = CheckpointDispatcher.iter_statuses(self._runner_ckpt(task_id))
    try:
      return [(state.timestamp_ms / 1000.0, state.state) for state in statuses]
    except CheckpointDispatcher.ErrorRecoveringState:
      return []

  def get_sandbox(self, task_id):
    """Returns the sandbox of the task, or None if it has not yet been initialized."""
    try:
      for update in CheckpointDispatcher.iter_updates(self._runner_ckpt(task_id)):
        if update.runner_header and update.runner_header.sandbox:
          return update.runner_header.sandbox
    except CheckpointDispatcher.ErrorRecoveringState:
      return None

  def maybe_terminate_unknown_task(self, task_id):
    """Terminate a task if we believe the scheduler doesn't know about it.

       It's possible for the scheduler to queue a GC and launch a task afterwards, in which
       case we may see actively running tasks that the scheduler did not report in the
       AdjustRetainedTasks message.

       Returns:
         boolean indicating whether the task was terminated
    """
    states = self.get_states(task_id)
    if states:
      task_start_time, _ = states[0]
      if self._start_time - task_start_time > self.MINIMUM_KILL_AGE.as_(Time.SECONDS):
        return self._terminate_task(task_id)
    return False

  def should_gc_task(self, task_id):
    """Check if a possibly-corrupt task should be locally GCed

      A task should be GCed if its checkpoint stream appears to be corrupted and the kill age
      threshold is exceeded.

       Returns:
         set, containing the task_id if it should be marked for local GC, or empty otherwise
    """
    runner_ckpt = self._runner_ckpt(task_id)
    if not os.path.exists(runner_ckpt):
      return set()
    latest_update = os.path.getmtime(runner_ckpt)
    if self._start_time - latest_update > self.MINIMUM_KILL_AGE.as_(Time.SECONDS):
      self.log('Got corrupt checkpoint file for %s - marking for local GC' % task_id)
      return set([task_id])
    else:
      self.log('Checkpoint file unreadable, but not yet beyond MINIMUM_KILL_AGE threshold')
      return set()

  def reconcile_states(self, driver, retained_tasks):
    """Reconcile states that the scheduler thinks tasks are in vs what they really are in.

        Local    vs   Scheduler  => Action
       ===================================
        ACTIVE         ACTIVE    => no-op
        ACTIVE        STARTING   => no-op
        ACTIVE        TERMINAL   => maybe kill task*
        ACTIVE        !EXISTS    => maybe kill task*
       TERMINAL        ACTIVE    => send actual status**
       TERMINAL       STARTING   => send actual status**
       TERMINAL       TERMINAL   => no-op
       TERMINAL       !EXISTS    => gc locally
       !EXISTS         ACTIVE    => send LOST**
       !EXISTS        STARTING   => no-op
       !EXISTS        TERMINAL   => gc remotely

       * - Only kill if this does not appear to be a race condition.
       ** - These appear to have no effect

       Side effecting operations:
         ACTIVE   | (TERMINAL / !EXISTS) => maybe kill
         TERMINAL | !EXISTS              => delete
         !EXISTS  | TERMINAL             => delete

      Returns tuple of (local_gc, remote_gc, updates), where:
        local_gc - set of task_ids to be GCed locally
        remote_gc - set of task_ids to be deleted on the scheduler
        updates - dictionary of updates sent to the scheduler (task_id: ScheduleStatus)
    """
    def partition(rt):
      active, starting, finished = set(), set(), set()
      for task_id, schedule_status in rt.items():
        if schedule_status in TERMINAL_STATES:
          finished.add(task_id)
        elif (schedule_status == ScheduleStatus.STARTING or
              schedule_status == ScheduleStatus.ASSIGNED):
          starting.add(task_id)
        else:
          active.add(task_id)
      return active, starting, finished

    local_active, local_finished = self.partition_tasks()
    sched_active, sched_starting, sched_finished = partition(retained_tasks)
    local_task_ids = local_active | local_finished
    sched_task_ids = sched_active | sched_starting | sched_finished
    all_task_ids = local_task_ids | sched_task_ids

    self.log('Told to retain the following task ids:')
    for task_id, schedule_status in retained_tasks.items():
      self.log('  => %s as %s' %
          (task_id, ScheduleStatus._VALUES_TO_NAMES.get(schedule_status, 'UNKNOWN')))

    self.log('Local active tasks:')
    for task_id in local_active:
      self.log('  => %s' % task_id)

    self.log('Local finished tasks:')
    for task_id in local_finished:
      self.log('  => %s' % task_id)

    local_gc, remote_gc = set(), set()
    updates = {}

    for task_id in all_task_ids:
      if task_id in local_active and task_id not in (sched_active | sched_starting):
        self.log('Inspecting task %s for termination.' % task_id)
        if not self.maybe_terminate_unknown_task(task_id):
          local_gc.update(self.should_gc_task(task_id))
      if task_id in local_finished and task_id not in sched_task_ids:
        self.log('Queueing task %s for local deletion.' % task_id)
        local_gc.add(task_id)
      if task_id in local_finished and task_id in (sched_active | sched_starting):
        self.log('Task %s finished but scheduler thinks active/starting.' % task_id)
        states = self.get_states(task_id)
        if states:
          _, last_state = states[-1]
          updates[task_id] = THERMOS_TO_TWITTER_STATES.get(
              last_state,
              ScheduleStatus.SANDBOX_DELETED)
          self.send_update(
              driver,
              task_id,
              THERMOS_TO_MESOS_STATES.get(last_state, mesos_pb2.TASK_LOST),
              'Task finish detected by GC executor.')
        else:
          local_gc.update(self.should_gc_task(task_id))
      if task_id in sched_finished and task_id not in local_task_ids:
        self.log('Queueing task %s for remote deletion.' % task_id)
        remote_gc.add(task_id)
      if task_id not in local_task_ids and task_id in sched_active:
        self.log('Know nothing about task %s, telling scheduler of LOSS.' % task_id)
        updates[task_id] = ScheduleStatus.LOST
        self.send_update(
            driver, task_id, mesos_pb2.TASK_LOST, 'GC executor found no trace of task.')
      if task_id not in local_task_ids and task_id in sched_starting:
        self.log('Know nothing about task %s, but scheduler says STARTING - passing' % task_id)

    return local_gc, remote_gc, updates

  def clean_orphans(self, driver):
    """Inspect checkpoints for trees that have been kill -9'ed but not properly cleaned up."""
    self.log('Checking for orphaned tasks')
    active_tasks, _ = self.partition_tasks()
    updates = {}

    inspector = CheckpointInspector(self._checkpoint_root)

    def is_our_process(process, uid, timestamp):
      if process.uids.real != uid:
        return False
      estimated_start_time = self._clock.time() - process.create_time
      return abs(timestamp - estimated_start_time) < self.MAX_PID_TIME_DRIFT.as_(Time.SECONDS)

    for task_id in active_tasks:
      self.log('Inspecting running task: %s' % task_id)
      inspection = inspector.inspect(task_id)
      if not inspection:
        self.log('  - Error inspecting task runner')
        continue
      latest_runner = inspection.runner_processes[-1]
      # Assume that it has not yet started?
      if not latest_runner:
        self.log('  - Task has no registered runners.')
        continue
      runner_pid, runner_uid, timestamp_ms = latest_runner
      try:
        runner_process = psutil.Process(runner_pid)
        if is_our_process(runner_process, runner_uid, timestamp_ms / 1000.0):
          self.log('  - Runner appears healthy.')
          continue
      except psutil.NoSuchProcess:
        # Runner is dead
        pass
      except psutil.Error as err:
        self.log('  - Error sampling runner process [pid=%s]: %s' % (runner_pid, err))
        continue
      try:
        latest_update = os.path.getmtime(self._runner_ckpt(task_id))
      except (IOError, OSError) as err:
        self.log('  - Error accessing runner ckpt: %s' % err)
        continue
      if self._clock.time() - latest_update < self.MAX_CHECKPOINT_TIME_DRIFT.as_(Time.SECONDS):
        self.log('  - Runner is dead but under LOST threshold.')
        continue
      self.log('  - Runner is dead but beyond LOST threshold: %.1fs' % (
          self._clock.time() - latest_update))
      if self._terminate_task(task_id, kill=False):
        updates[task_id] = ScheduleStatus.LOST
        self.send_update(
            driver, task_id, mesos_pb2.TASK_LOST, 'GC executor detected failed task runner.')

    return updates

  def _erase_sandbox(self, task_id):
    # TODO(wickman) Only mesos should be in the business of garbage collecting sandboxes.
    header_sandbox = self.get_sandbox(task_id)
    directory_sandbox = DirectorySandbox(header_sandbox) if header_sandbox else None
    if directory_sandbox and directory_sandbox.exists():
      self.log('Destroying DirectorySandbox for %s' % task_id)
      try:
        directory_sandbox.destroy()
      except DirectorySandbox.Error as e:
        self.log('Failed to destroy DirectorySandbox: %s' % e)
    else:
      self.log('Found no sandboxes for %s' % task_id)

  def _gc(self, task_id):
    """Erase the sandbox, logs and metadata of the given task."""
    self.log('Erasing sandbox for %s' % task_id)
    self._erase_sandbox(task_id)
    self.log('Erasing logs for %s' % task_id)
    self._collector.erase_logs(task_id)
    self.log('Erasing metadata for %s' % task_id)
    self._collector.erase_metadata(task_id)

  def garbage_collect(self, force_delete=frozenset()):
    """Garbage collect tasks on the system no longer active or in the supplied force_delete.

    Return a set of task_ids representing the tasks that were garbage collected.
    """
    active_tasks, finished_tasks = self.partition_tasks()
    retained_executors = set(iter(self.linked_executors))
    self.log('Executor sandboxes retained by Mesos:')
    if retained_executors:
      for r_e in sorted(retained_executors):
        self.log('  %s' % r_e)
    else:
      self.log('  None')
    for task_id in (active_tasks - retained_executors):
      self.log('ERROR: Active task %s had its executor sandbox pulled.' % task_id)
    gc_tasks = (finished_tasks - retained_executors) | force_delete
    for gc_task in gc_tasks:
      self._gc(gc_task)
    return gc_tasks

  @property
  def linked_executors(self):
    """Generator yielding the executor sandboxes detected on the system."""
    thermos_executor_prefix = 'thermos-'
    for executor in self._detector:
      # It's possible for just the 'latest' symlink to be present but no run directories.
      # This indicates that the task has been fully garbage collected.
      if executor.executor_id.startswith(thermos_executor_prefix) and executor.run != 'latest':
        yield executor.executor_id[len(thermos_executor_prefix):]

  def _run_gc(self, task, retain_tasks, retain_start):
    """
      Reconcile the set of tasks to retain (provided by the scheduler) with the current state of
      executors on this system. Garbage collect tasks/executors as appropriate.

      Not re-entrant! Previous executions must complete (and clear self._task_id) before this can be
      invoked.

      Potentially blocking (e.g. on I/O) in self.garbage_collect()

      Args:
        task: TaskInfo provided by the slave
        retain_tasks: mapping of task_id => ScheduleStatus, describing what the scheduler thinks is
                      running on this system
        retain_start: the time at which the retain_tasks message is effective -- this means that
                      tasks started after the retain_tasks message is effective are skipped
                      until future GC runs.
    """
    task_id = task.task_id.value
    if self._task_id is not None:
      raise RuntimeError('_run_gc() called [task_id=%s], but already running [task_id=%s]'
                         % (task_id, self._task_id))
    self._task_id = task_id
    self.log('Launching garbage collection [task_id=%s]' % task_id)
    self._start_time = retain_start
    local_gc, remote_gc, _ = self.reconcile_states(self._driver, retain_tasks)
    deleted_tasks = set(retain_tasks).intersection(self.garbage_collect(local_gc)) | remote_gc
    if deleted_tasks:
      self._driver.sendFrameworkMessage(thrift_serialize(
          SchedulerMessage(deletedTasks=DeletedTasks(taskIds=deleted_tasks))))
    self.send_update(
        self._driver, task.task_id.value, mesos_pb2.TASK_FINISHED, 'Garbage collection finished.')
    self.log('Garbage collection complete [task_id=%s]' % task_id)
    self._task_id = self._start_time = None

  def run(self):
    """Main GC executor event loop.

      Periodically perform state reconciliation with the set of tasks provided
      by the slave, and garbage collect orphaned tasks on the system.
    """
    run_start = self._clock.time()
    last_gc_run = self._clock.time()

    def should_terminate():
      now = self._clock.time()
      if now > run_start + self.MAXIMUM_EXECUTOR_LIFETIME.as_(Time.SECONDS):
        return True
      if now > last_gc_run + self.MAXIMUM_EXECUTOR_WAIT.as_(Time.SECONDS):
        return True
      return self._stop_event.is_set()

    while not should_terminate():
      try:
        _, (task, retain_tasks, retain_start) = self._gc_task_queue.popitem(0)
        last_gc_run = retain_start
        self._run_gc(task, retain_tasks, retain_start)
      except KeyError:  # no enqueued GC tasks
        pass
      if self._driver is not None:
        self.clean_orphans(self._driver)
      self._stop_event.wait(self.POLL_WAIT.as_(Time.SECONDS))

    # shutdown
    if self._driver is not None:
      try:
        prev_task_id, _ = self._gc_task_queue.popitem(0)
      except KeyError:  # no enqueued GC tasks
        pass
      else:
        self.send_update(self._driver, prev_task_id, mesos_pb2.TASK_FINISHED,
                         'Garbage collection skipped - GC executor shutting down')
        # TODO(jon) Remove this once external MESOS-243 is resolved.
        self.log('Sleeping briefly to mitigate https://issues.apache.org/jira/browse/MESOS-243')
        self._clock.sleep(self.PERSISTENCE_WAIT.as_(Time.SECONDS))

      self._driver.stop()

  """ Mesos Executor API methods follow """

  def launchTask(self, driver, task):
    """Queue a new garbage collection run, and drop any currently-enqueued runs."""
    if self._slave_id is None:
      self._slave_id = task.slave_id.value
    task_id = task.task_id.value
    self.log('launchTask() got task_id: %s' % task_id)
    if task_id == self._task_id:
      self.log('=> GC with task_id %s currently running - ignoring' % task_id)
      return
    elif task_id in self._gc_task_queue:
      self.log('=> Already have task_id %s queued - ignoring' % task_id)
      return
    try:
      art = thrift_deserialize(AdjustRetainedTasks(), task.data)
    except Exception as err:
      self.log('Error deserializing task: %s' % err)
      self.send_update(
          self._driver, task_id, mesos_pb2.TASK_FAILED, 'Deserialization of GC task failed')
      return
    try:
      prev_task_id, _ = self._gc_task_queue.popitem(0)
    except KeyError:  # no enqueued GC tasks - reset counter
      self._dropped_tasks.write(0)
    else:
      self.log('=> Dropping previously queued GC with task_id %s' % prev_task_id)
      self._dropped_tasks.increment()
      self.log('=> Updating scheduler')
      self.send_update(self._driver, prev_task_id, mesos_pb2.TASK_FINISHED,
                       'Garbage collection skipped - GC executor received another task')
    self.log('=> Adding %s to GC queue' % task_id)
    self._gc_task_queue[task_id] = (task, art.retainedTasks, self._clock.time())

  def killTask(self, driver, task_id):
    """Remove the specified task from the queue, if it's not yet run. Otherwise, no-op."""
    self.log('killTask() got task_id: %s' % task_id)
    task = self._gc_task_queue.pop(task_id, None)
    if task is not None:
      self.log('=> Removed %s from queued GC tasks' % task_id)
    elif task_id == self._task_id:
      self.log('=> GC with task_id %s currently running - ignoring' % task_id)
    else:
      self.log('=> Unknown task_id %s - ignoring' % task_id)

  def shutdown(self, driver):
    """Trigger the Executor to shut down as soon as the current GC run is finished."""
    self.log('shutdown() called - setting stop event')
    self._stop_event.set()
Beispiel #40
0
    def attempt(context, phases, timer=None):
        """
      Attempts to reach the goals for the supplied phases, optionally recording phase timings and
      then logging then when all specified phases have completed.
    """

        start = timer.now() if timer else None
        executed = OrderedDict()

        # I'd rather do this in a finally block below, but some goals os.fork and each of these cause
        # finally to run, printing goal timings multiple times instead of once at the end.
        def print_timings():
            if timer:
                timer.log('Timing report')
                timer.log('=============')
                for phase, timings in executed.items():
                    phase_time = None
                    for goal, times in timings.items():
                        if len(times) > 1:
                            timer.log(
                                '[%(phase)s:%(goal)s(%(numsteps)d)] %(timings)s -> %(total).3fs'
                                % {
                                    'phase':
                                    phase,
                                    'goal':
                                    goal,
                                    'numsteps':
                                    len(times),
                                    'timings':
                                    ','.join('%.3fs' % time for time in times),
                                    'total':
                                    sum(times)
                                })
                        else:
                            timer.log('[%(phase)s:%(goal)s] %(total).3fs' % {
                                'phase': phase,
                                'goal': goal,
                                'total': sum(times)
                            })
                        if not phase_time:
                            phase_time = 0
                        phase_time += sum(times)
                    if len(timings) > 1:
                        timer.log('[%(phase)s] total: %(total).3fs' % {
                            'phase': phase,
                            'total': phase_time
                        })
                elapsed = timer.now() - start
                timer.log('total: %.3fs' % elapsed)

        try:
            # Prepare tasks roots to leaves and allow for goals introducing new goals in existing phases.
            tasks_by_goal = {}
            expanded = OrderedSet()
            prepared = set()
            round = 0
            while True:
                goals = list(Phase.execution_order(phases))
                if set(goals) == prepared:
                    break
                else:
                    round += 1
                    context.log.debug('Preparing goals in round %d' % round)
                    for goal in reversed(goals):
                        if goal not in prepared:
                            phase = Phase.of(goal)
                            expanded.add(phase)
                            context.log.debug('preparing: %s:%s' %
                                              (phase, goal.name))
                            prepared.add(goal)
                            task = goal.prepare(context)
                            tasks_by_goal[goal] = task

            # Execute phases leaves to roots
            context.log.debug('Executing goals in phases %s' %
                              ' -> '.join(map(str, reversed(expanded))))
            for phase in phases:
                Group.execute(phase,
                              tasks_by_goal,
                              context,
                              executed,
                              timer=timer)

            print_timings()
            return 0
        except (TaskError, GoalError) as e:
            message = '%s' % e
            if message:
                print('\nFAILURE: %s\n' % e)
            else:
                print('\nFAILURE\n')
            print_timings()
            return 1
Beispiel #41
0
class TestBuildLocalPythonDistributions(BuildLocalPythonDistributionsTestBase):

  dist_specs = OrderedDict([

    ('src/python/dist:universal_dist', {
      'key': 'universal',
      'target_type': PythonDistribution,
      'sources': ['__init__.py', 'setup.py'],
      'filemap': {
        '__init__.py': '',
        'setup.py': """\
from setuptools import find_packages, setup
setup(
  name='universal_dist',
  version='0.0.0',
  packages=find_packages()
)
        """,
      },
    }),

    ('3rdparty/python:pycountry', {
      'key': 'pycountry',
      'target_type': PythonRequirementLibrary,
      'requirements': [
        PythonRequirement('pycountry==18.5.20'),
      ],
    }),

    ('src/python/setup_requires:setup_requires', {
      'key': 'setup_requires',
      'target_type': PythonDistribution,
      'setup_requires': [
        '3rdparty/python:pycountry',
      ],
      'sources': ['__init__.py', 'setup.py'],
      'filemap': {
        '__init__.py': '',
        'setup.py': """\
from setuptools import find_packages, setup
import pycountry

us_country_string = pycountry.countries.get(alpha_2='US').name.replace(' ', '_').lower()

setup(
  name='setup_requires_dist_{}'.format(us_country_string),
  version='0.0.0',
  packages=find_packages(),
)
        """,
      },
    }),

    ('src/python/install_requires:install_requires', {
      'key': 'install_requires',
      'target_type': PythonDistribution,
      'sources': ['__init__.py', 'setup.py'],
      'filemap': {
        '__init__.py': '',
        'setup.py': """\
from setuptools import setup

setup(
  name='install_requires_dist',
  version='0.0.0',
  install_requires=['pycountry==17.1.2'],
)
        """,
      },
    }),

    ('src/python/install_requires:conflict', {
      'key': 'install_requires_conflict',
      'target_type': PythonLibrary,
      'dependencies': [
        '3rdparty/python:pycountry',
        'src/python/install_requires:install_requires',
      ],
    }),
  ])

  def test_create_distribution(self):
    universal_dist = self.target_dict['universal']
    self._assert_dist_and_wheel_identity(
      expected_name='universal_dist',
      expected_version='0.0.0',
      expected_platform=self.ExpectedPlatformType.any,
      dist_target=universal_dist,
    )

  def test_python_dist_setup_requires(self):
    setup_requires_dist = self.target_dict['setup_requires']
    self._assert_dist_and_wheel_identity(
      expected_name='setup_requires_dist_united_states',
      expected_version='0.0.0',
      expected_platform=self.ExpectedPlatformType.any,
      dist_target=setup_requires_dist,
      extra_targets=[self.target_dict['pycountry']],
    )

  def test_install_requires(self):
    install_requires_dist = self.target_dict['install_requires']
    self._assert_dist_and_wheel_identity(
      expected_name='install_requires_dist',
      expected_version='0.0.0',
      expected_platform=self.ExpectedPlatformType.any,
      dist_target=install_requires_dist,
    )

  def test_install_requires_conflict(self):
    install_requires_dist = self.target_dict['install_requires']
    pycountry_req_lib = self.target_dict['pycountry']
    conflicting_lib = self.target_dict['install_requires_conflict']

    with self.assertRaisesRegexp(
        pex.resolver.Unsatisfiable,
        re.escape('Could not satisfy all requirements for pycountry==18.5.20:')):
      self._create_distribution_synthetic_target(
        install_requires_dist,
        extra_targets=[pycountry_req_lib, conflicting_lib])
Beispiel #42
0
class BuildGraph(object):
  """A directed acyclic graph of Targets and dependencies. Not necessarily connected.
  """

  class TransitiveLookupError(AddressLookupError):
    """Used to append the current node to the error message from an AddressLookupError """

  def __init__(self, address_mapper, run_tracker=None):
    self._address_mapper = address_mapper
    self.run_tracker = run_tracker
    self.reset()

  def reset(self):
    """Clear out the state of the BuildGraph, in particular Target mappings and dependencies."""
    self._addresses_already_closed = set()
    self._target_by_address = OrderedDict()
    self._target_dependencies_by_address = defaultdict(OrderedSet)
    self._target_dependees_by_address = defaultdict(set)
    self._derived_from_by_derivative_address = {}

  def contains_address(self, address):
    return address in self._target_by_address

  def get_target_from_spec(self, spec, relative_to=''):
    """Converts `spec` into a SyntheticAddress and returns the result of `get_target`"""
    return self.get_target(SyntheticAddress.parse(spec, relative_to=relative_to))

  def get_target(self, address):
    """Returns the Target at `address` if it has been injected into the BuildGraph, otherwise None.
    """
    return self._target_by_address.get(address, None)

  def dependencies_of(self, address):
    """Returns the dependencies of the Target at `address`.

    This method asserts that the address given is actually in the BuildGraph.
    """
    assert address in self._target_by_address, (
      'Cannot retrieve dependencies of {address} because it is not in the BuildGraph.'
      .format(address=address)
    )
    return self._target_dependencies_by_address[address]

  def dependents_of(self, address):
    """Returns the Targets which depend on the target at `address`.

    This method asserts that the address given is actually in the BuildGraph.
    """
    assert address in self._target_by_address, (
      'Cannot retrieve dependents of {address} because it is not in the BuildGraph.'
      .format(address=address)
    )
    return self._target_dependees_by_address[address]

  def get_derived_from(self, address):
    """Get the target the specified target was derived from.

    If a Target was injected programmatically, e.g. from codegen, this allows us to trace its
    ancestry.  If a Target is not derived, default to returning itself.
    """
    parent_address = self._derived_from_by_derivative_address.get(address, address)
    return self.get_target(parent_address)

  def get_concrete_derived_from(self, address):
    """Get the concrete target the specified target was (directly or indirectly) derived from.

    The returned target is guaranteed to not have been derived from any other target.
    """
    current_address = address
    next_address = self._derived_from_by_derivative_address.get(current_address, current_address)
    while next_address != current_address:
      current_address = next_address
      next_address = self._derived_from_by_derivative_address.get(current_address, current_address)
    return self.get_target(current_address)

  def inject_target(self, target, dependencies=None, derived_from=None):
    """Injects a fully realized Target into the BuildGraph.

    :param Target target: The Target to inject.
    :param list<Address> dependencies: The Target addresses that `target` depends on.
    :param Target derived_from: The Target that `target` was derived from, usually as a result
      of codegen.
    """

    dependencies = dependencies or frozenset()
    address = target.address

    if address in self._target_by_address:
      raise ValueError('A Target {existing_target} already exists in the BuildGraph at address'
                       ' {address}.  Failed to insert {target}.'
                       .format(existing_target=self._target_by_address[address],
                               address=address,
                               target=target))

    if derived_from:
      if not self.contains_address(derived_from.address):
        raise ValueError('Attempted to inject synthetic {target} derived from {derived_from}'
                         ' into the BuildGraph, but {derived_from} was not in the BuildGraph.'
                         ' Synthetic Targets must be derived from no Target (None) or from a'
                         ' Target already in the BuildGraph.'
                         .format(target=target,
                                 derived_from=derived_from))
      self._derived_from_by_derivative_address[target.address] = derived_from.address

    self._target_by_address[address] = target

    for dependency_address in dependencies:
      self.inject_dependency(dependent=address, dependency=dependency_address)

  def inject_dependency(self, dependent, dependency):
    """Injects a dependency from `dependent` onto `dependency`.

    It is an error to inject a dependency if the dependent doesn't already exist, but the reverse
    is not an error.

    :param Address dependent: The (already injected) address of a Target to which `dependency`
      is being added.
    :param Address dependency: The dependency to be injected.
    """
    if dependent not in self._target_by_address:
      raise ValueError('Cannot inject dependency from {dependent} on {dependency} because the'
                       ' dependent is not in the BuildGraph.'
                       .format(dependent=dependent, dependency=dependency))

    # TODO(pl): Unfortunately this is an unhelpful time to error due to a cycle.  Instead, we warn
    # and allow the cycle to appear.  It is the caller's responsibility to call sort_targets on the
    # entire graph to generate a friendlier CycleException that actually prints the cycle.
    # Alternatively, we could call sort_targets after every inject_dependency/inject_target, but
    # that could have nasty performance implications.  Alternative 2 would be to have an internal
    # data structure of the topologically sorted graph which would have acceptable amortized
    # performance for inserting new nodes, and also cycle detection on each insert.

    if dependency not in self._target_by_address:
      logger.warning('Injecting dependency from {dependent} on {dependency}, but the dependency'
                     ' is not in the BuildGraph.  This probably indicates a dependency cycle, but'
                     ' it is not an error until sort_targets is called on a subgraph containing'
                     ' the cycle.'
                     .format(dependent=dependent, dependency=dependency))

    if dependency in self.dependencies_of(dependent):
      logger.warn('{dependent} already depends on {dependency}'
                  .format(dependent=dependent, dependency=dependency))
    else:
      self._target_dependencies_by_address[dependent].add(dependency)
      self._target_dependees_by_address[dependency].add(dependent)

  def targets(self, predicate=None):
    """Returns all the targets in the graph in no particular order.

    :param predicate: A target predicate that will be used to filter the targets returned.

    :return: a list of targets evaluated in inorder traversal order.
    """
    return filter(predicate, self._target_by_address.values())

  def sorted_targets(self):
    """:return: targets ordered from most dependent to least."""
    return sort_targets(self._target_by_address.values())

  def walk_transitive_dependency_graph(self, addresses, work, predicate=None):
    """Given a work function, walks the transitive dependency closure of `addresses`.

    :param list<Address> addresses: The closure of `addresses` will be walked.
    :param function work: The function that will be called on every target in the closure using
      inorder traversal order.
    :param function predicate: If this parameter is not given, no Targets will be filtered
      out of the closure.  If it is given, any Target which fails the predicate will not be
      walked, nor will its dependencies.  Thus predicate effectively trims out any subgraph
      that would only be reachable through Targets that fail the predicate.
    """
    walked = set()
    def _walk_rec(address):
      if address not in walked:
        walked.add(address)
        target = self._target_by_address[address]
        if not predicate or predicate(target):
          work(target)
          for dep_address in self._target_dependencies_by_address[address]:
            _walk_rec(dep_address)
    for address in addresses:
      _walk_rec(address)

  def walk_transitive_dependee_graph(self, addresses, work, predicate=None):
    """Identical to `walk_transitive_dependency_graph`, but walks dependees inorder traversal order.

    This is identical to reversing the direction of every arrow in the DAG, then calling
    `walk_transitive_dependency_graph`.
    """
    walked = set()
    def _walk_rec(address):
      if address not in walked:
        walked.add(address)
        target = self._target_by_address[address]
        if not predicate or predicate(target):
          work(target)
          for dep_address in self._target_dependees_by_address[address]:
            _walk_rec(dep_address)
    for address in addresses:
      _walk_rec(address)

  def transitive_dependees_of_addresses(self, addresses, predicate=None):
    """Returns all transitive dependees of `address`.

    Note that this uses `walk_transitive_dependee_graph` and the predicate is passed through,
    hence it trims graphs rather than just filtering out Targets that do not match the predicate.
    See `walk_transitive_dependee_graph for more detail on `predicate`.

    :param list<Address> addresses: The root addresses to transitively close over.
    :param function predicate: The predicate passed through to `walk_transitive_dependee_graph`.
    """
    ret = OrderedSet()
    self.walk_transitive_dependee_graph(addresses, ret.add, predicate=predicate)
    return ret

  def transitive_subgraph_of_addresses(self, addresses, predicate=None):
    """Returns all transitive dependencies of `address`.

    Note that this uses `walk_transitive_dependencies_graph` and the predicate is passed through,
    hence it trims graphs rather than just filtering out Targets that do not match the predicate.
    See `walk_transitive_dependencies_graph for more detail on `predicate`.

    :param list<Address> addresses: The root addresses to transitively close over.
    :param function predicate: The predicate passed through to
      `walk_transitive_dependencies_graph`.
    """
    ret = OrderedSet()
    self.walk_transitive_dependency_graph(addresses, ret.add, predicate=predicate)
    return ret

  def inject_synthetic_target(self,
                              address,
                              target_type,
                              dependencies=None,
                              derived_from=None,
                              **kwargs):
    """Constructs and injects Target at `address` with optional `dependencies` and `derived_from`.

    This method is useful especially for codegen, where a "derived" Target is injected
    programmatically rather than read in from a BUILD file.

    :param Address address: The address of the new Target.  Must not already be in the BuildGraph.
    :param type target_type: The class of the Target to be constructed.
    :param list<Address> dependencies: The dependencies of this Target, usually inferred or copied
      from the `derived_from`.
    :param Target derived_from: The Target this Target will derive from.
    """
    if self.contains_address(address):
      raise ValueError('Attempted to inject synthetic {target_type} derived from {derived_from}'
                       ' into the BuildGraph with address {address}, but there is already a Target'
                       ' {existing_target} with that address'
                       .format(target_type=target_type,
                               derived_from=derived_from,
                               address=address,
                               existing_target=self.get_target(address)))

    target = target_type(name=address.target_name,
                         address=address,
                         build_graph=self,
                         **kwargs)
    self.inject_target(target, dependencies=dependencies, derived_from=derived_from)

  def inject_address(self, address):
    """Delegates to an internal AddressMapper to resolve, construct, and inject a Target.

    :param Address address: The address to inject.  Must be resolvable by `self._address_mapper`.
    """
    if not self.contains_address(address):
      target_addressable = self._address_mapper.resolve(address)
      target = self.target_addressable_to_target(address, target_addressable)
      self.inject_target(target)

  def inject_address_closure(self, address):
    """Recursively calls `inject_address` through the transitive closure of dependencies."""

    if address in self._addresses_already_closed:
      return

    mapper = self._address_mapper

    target_addressable = mapper.resolve(address)

    self._addresses_already_closed.add(address)
    try:
      dep_addresses = list(mapper.specs_to_addresses(target_addressable.dependency_specs,
                                                     relative_to=address.spec_path))
      for dep_address in dep_addresses:
        self.inject_address_closure(dep_address)

      if not self.contains_address(address):
        target = self.target_addressable_to_target(address, target_addressable)
        self.inject_target(target, dependencies=dep_addresses)
      else:
        target = self.get_target(address)

      for traversable_spec in target.traversable_dependency_specs:
        self.inject_spec_closure(spec=traversable_spec, relative_to=address.spec_path)
        traversable_spec_target = self.get_target_from_spec(traversable_spec,
                                                            relative_to=address.spec_path)
        if traversable_spec_target not in target.dependencies:
          self.inject_dependency(dependent=target.address,
                                 dependency=traversable_spec_target.address)
          target.mark_transitive_invalidation_hash_dirty()

      for traversable_spec in target.traversable_specs:
        self.inject_spec_closure(spec=traversable_spec, relative_to=address.spec_path)
        target.mark_transitive_invalidation_hash_dirty()

    except AddressLookupError as e:
      raise self.TransitiveLookupError("{message}\n  referenced from {spec}"
                                       .format(message=e, spec=address.spec))


  def inject_spec_closure(self, spec, relative_to=''):
    """Constructs a SyntheticAddress from `spec` and calls `inject_address_closure`.

    :param string spec: A Target spec
    :param string relative_to: The spec_path of the BUILD file this spec was read from.
    """
    address = self._address_mapper.spec_to_address(spec, relative_to=relative_to)
    self.inject_address_closure(address)

  def target_addressable_to_target(self, address, addressable):
    """Realizes a TargetAddressable into a Target at `address`.

    :param TargetAddressable addressable:
    :param Address address:
    """
    try:
      target = addressable.get_target_type()(build_graph=self,
                                             address=address,
                                             **addressable.kwargs)
      target.with_description(addressable.description)
      return target
    except Exception:
      traceback.print_exc()
      logger.exception('Failed to instantiate Target with type {target_type} with name "{name}"'
                       ' at address {address}'
                       .format(target_type=addressable.get_target_type(),
                               name=addressable.name,
                               address=address))
      raise
Beispiel #43
0
class BuildGraph(object):
  """A directed acyclic graph of Targets and dependencies. Not necessarily connected.
  """

  class DuplicateAddressError(AddressLookupError):
    """The same address appears multiple times in a dependency list"""

  class TransitiveLookupError(AddressLookupError):
    """Used to append the current node to the error message from an AddressLookupError """

  def __init__(self, address_mapper, run_tracker=None):
    self._address_mapper = address_mapper
    self.run_tracker = run_tracker
    self.reset()

  def reset(self):
    """Clear out the state of the BuildGraph, in particular Target mappings and dependencies."""
    self._addresses_already_closed = set()
    self._target_by_address = OrderedDict()
    self._target_dependencies_by_address = defaultdict(OrderedSet)
    self._target_dependees_by_address = defaultdict(set)
    self._derived_from_by_derivative_address = {}

  def contains_address(self, address):
    return address in self._target_by_address

  def get_target_from_spec(self, spec, relative_to=''):
    """Converts `spec` into a SyntheticAddress and returns the result of `get_target`"""
    return self.get_target(SyntheticAddress.parse(spec, relative_to=relative_to))

  def get_target(self, address):
    """Returns the Target at `address` if it has been injected into the BuildGraph, otherwise None.
    """
    return self._target_by_address.get(address, None)

  def dependencies_of(self, address):
    """Returns the dependencies of the Target at `address`.

    This method asserts that the address given is actually in the BuildGraph.
    """
    assert address in self._target_by_address, (
      'Cannot retrieve dependencies of {address} because it is not in the BuildGraph.'
      .format(address=address)
    )
    return self._target_dependencies_by_address[address]

  def dependents_of(self, address):
    """Returns the Targets which depend on the target at `address`.

    This method asserts that the address given is actually in the BuildGraph.
    """
    assert address in self._target_by_address, (
      'Cannot retrieve dependents of {address} because it is not in the BuildGraph.'
      .format(address=address)
    )
    return self._target_dependees_by_address[address]

  def get_derived_from(self, address):
    """Get the target the specified target was derived from.

    If a Target was injected programmatically, e.g. from codegen, this allows us to trace its
    ancestry.  If a Target is not derived, default to returning itself.
    """
    parent_address = self._derived_from_by_derivative_address.get(address, address)
    return self.get_target(parent_address)

  def get_concrete_derived_from(self, address):
    """Get the concrete target the specified target was (directly or indirectly) derived from.

    The returned target is guaranteed to not have been derived from any other target.
    """
    current_address = address
    next_address = self._derived_from_by_derivative_address.get(current_address, current_address)
    while next_address != current_address:
      current_address = next_address
      next_address = self._derived_from_by_derivative_address.get(current_address, current_address)
    return self.get_target(current_address)

  def inject_target(self, target, dependencies=None, derived_from=None):
    """Injects a fully realized Target into the BuildGraph.

    :param Target target: The Target to inject.
    :param list<Address> dependencies: The Target addresses that `target` depends on.
    :param Target derived_from: The Target that `target` was derived from, usually as a result
      of codegen.
    """

    dependencies = dependencies or frozenset()
    address = target.address

    if address in self._target_by_address:
      raise ValueError('A Target {existing_target} already exists in the BuildGraph at address'
                       ' {address}.  Failed to insert {target}.'
                       .format(existing_target=self._target_by_address[address],
                               address=address,
                               target=target))

    if derived_from:
      if not self.contains_address(derived_from.address):
        raise ValueError('Attempted to inject synthetic {target} derived from {derived_from}'
                         ' into the BuildGraph, but {derived_from} was not in the BuildGraph.'
                         ' Synthetic Targets must be derived from no Target (None) or from a'
                         ' Target already in the BuildGraph.'
                         .format(target=target,
                                 derived_from=derived_from))
      self._derived_from_by_derivative_address[target.address] = derived_from.address

    self._target_by_address[address] = target

    for dependency_address in dependencies:
      self.inject_dependency(dependent=address, dependency=dependency_address)

  def inject_dependency(self, dependent, dependency):
    """Injects a dependency from `dependent` onto `dependency`.

    It is an error to inject a dependency if the dependent doesn't already exist, but the reverse
    is not an error.

    :param Address dependent: The (already injected) address of a Target to which `dependency`
      is being added.
    :param Address dependency: The dependency to be injected.
    """
    if dependent not in self._target_by_address:
      raise ValueError('Cannot inject dependency from {dependent} on {dependency} because the'
                       ' dependent is not in the BuildGraph.'
                       .format(dependent=dependent, dependency=dependency))

    # TODO(pl): Unfortunately this is an unhelpful time to error due to a cycle.  Instead, we warn
    # and allow the cycle to appear.  It is the caller's responsibility to call sort_targets on the
    # entire graph to generate a friendlier CycleException that actually prints the cycle.
    # Alternatively, we could call sort_targets after every inject_dependency/inject_target, but
    # that could have nasty performance implications.  Alternative 2 would be to have an internal
    # data structure of the topologically sorted graph which would have acceptable amortized
    # performance for inserting new nodes, and also cycle detection on each insert.

    if dependency not in self._target_by_address:
      logger.warning('Injecting dependency from {dependent} on {dependency}, but the dependency'
                     ' is not in the BuildGraph.  This probably indicates a dependency cycle, but'
                     ' it is not an error until sort_targets is called on a subgraph containing'
                     ' the cycle.'
                     .format(dependent=dependent, dependency=dependency))

    if dependency in self.dependencies_of(dependent):
      logger.debug('{dependent} already depends on {dependency}'
                   .format(dependent=dependent, dependency=dependency))
    else:
      self._target_dependencies_by_address[dependent].add(dependency)
      self._target_dependees_by_address[dependency].add(dependent)

  def targets(self, predicate=None):
    """Returns all the targets in the graph in no particular order.

    :param predicate: A target predicate that will be used to filter the targets returned.
    """
    return filter(predicate, self._target_by_address.values())

  def sorted_targets(self):
    """:return: targets ordered from most dependent to least."""
    return sort_targets(self._target_by_address.values())

  def walk_transitive_dependency_graph(self, addresses, work, predicate=None, postorder=False):
    """Given a work function, walks the transitive dependency closure of `addresses`.

    :param list<Address> addresses: The closure of `addresses` will be walked.
    :param function work: The function that will be called on every target in the closure using
      the specified traversal order.
    :param bool postorder: When ``True``, the traversal order is postorder (children before
      parents), else it is preorder (parents before children).
    :param function predicate: If this parameter is not given, no Targets will be filtered
      out of the closure.  If it is given, any Target which fails the predicate will not be
      walked, nor will its dependencies.  Thus predicate effectively trims out any subgraph
      that would only be reachable through Targets that fail the predicate.
    """
    walked = set()
    def _walk_rec(address):
      if address not in walked:
        walked.add(address)
        target = self._target_by_address[address]
        if not predicate or predicate(target):
          if not postorder:
            work(target)
          for dep_address in self._target_dependencies_by_address[address]:
            _walk_rec(dep_address)
          if postorder:
            work(target)
    for address in addresses:
      _walk_rec(address)

  def walk_transitive_dependee_graph(self, addresses, work, predicate=None, postorder=False):
    """Identical to `walk_transitive_dependency_graph`, but walks dependees preorder (or postorder
    if the postorder parameter is True).

    This is identical to reversing the direction of every arrow in the DAG, then calling
    `walk_transitive_dependency_graph`.
    """
    walked = set()
    def _walk_rec(address):
      if address not in walked:
        walked.add(address)
        target = self._target_by_address[address]
        if not predicate or predicate(target):
          if not postorder:
            work(target)
          for dep_address in self._target_dependees_by_address[address]:
            _walk_rec(dep_address)
          if postorder:
            work(target)
    for address in addresses:
      _walk_rec(address)

  def transitive_dependees_of_addresses(self, addresses, predicate=None, postorder=False):
    """Returns all transitive dependees of `address`.

    Note that this uses `walk_transitive_dependee_graph` and the predicate is passed through,
    hence it trims graphs rather than just filtering out Targets that do not match the predicate.
    See `walk_transitive_dependee_graph for more detail on `predicate`.

    :param list<Address> addresses: The root addresses to transitively close over.
    :param function predicate: The predicate passed through to `walk_transitive_dependee_graph`.
    """
    ret = OrderedSet()
    self.walk_transitive_dependee_graph(addresses, ret.add, predicate=predicate, postorder=False)
    return ret

  def transitive_subgraph_of_addresses(self, addresses, predicate=None, postorder=False):
    """Returns all transitive dependencies of `address`.

    Note that this uses `walk_transitive_dependencies_graph` and the predicate is passed through,
    hence it trims graphs rather than just filtering out Targets that do not match the predicate.
    See `walk_transitive_dependencies_graph for more detail on `predicate`.

    :param list<Address> addresses: The root addresses to transitively close over.
    :param function predicate: The predicate passed through to
      `walk_transitive_dependencies_graph`.
    """
    ret = OrderedSet()
    self.walk_transitive_dependency_graph(addresses, ret.add,
                                          predicate=predicate,
                                          postorder=postorder)
    return ret

  def inject_synthetic_target(self,
                              address,
                              target_type,
                              dependencies=None,
                              derived_from=None,
                              **kwargs):
    """Constructs and injects Target at `address` with optional `dependencies` and `derived_from`.

    This method is useful especially for codegen, where a "derived" Target is injected
    programmatically rather than read in from a BUILD file.

    :param Address address: The address of the new Target.  Must not already be in the BuildGraph.
    :param type target_type: The class of the Target to be constructed.
    :param list<Address> dependencies: The dependencies of this Target, usually inferred or copied
      from the `derived_from`.
    :param Target derived_from: The Target this Target will derive from.
    """
    if self.contains_address(address):
      raise ValueError('Attempted to inject synthetic {target_type} derived from {derived_from}'
                       ' into the BuildGraph with address {address}, but there is already a Target'
                       ' {existing_target} with that address'
                       .format(target_type=target_type,
                               derived_from=derived_from,
                               address=address,
                               existing_target=self.get_target(address)))

    target = target_type(name=address.target_name,
                         address=address,
                         build_graph=self,
                         **kwargs)
    self.inject_target(target, dependencies=dependencies, derived_from=derived_from)

  def inject_address(self, address):
    """Delegates to an internal AddressMapper to resolve, construct, and inject a Target.

    :param Address address: The address to inject.  Must be resolvable by `self._address_mapper`.
    """
    if not self.contains_address(address):
      target_addressable = self._address_mapper.resolve(address)
      target = self.target_addressable_to_target(address, target_addressable)
      self.inject_target(target)

  def inject_address_closure(self, address):
    """Recursively calls `inject_address` through the transitive closure of dependencies."""

    if address in self._addresses_already_closed:
      return

    mapper = self._address_mapper

    target_addressable = mapper.resolve(address)

    self._addresses_already_closed.add(address)
    try:
      dep_addresses = list(mapper.specs_to_addresses(target_addressable.dependency_specs,
                                                      relative_to=address.spec_path))
      deps_seen = set()
      for dep_address in dep_addresses:
        if dep_address in deps_seen:
          raise self.DuplicateAddressError(
            'Addresses in dependencies must be unique. \'{spec}\' is referenced more than once.'
            .format(spec=dep_address.spec))
        deps_seen.add(dep_address)
        self.inject_address_closure(dep_address)

      if not self.contains_address(address):
        target = self.target_addressable_to_target(address, target_addressable)
        self.inject_target(target, dependencies=dep_addresses)
      else:
        for dep_address in dep_addresses:
          if not dep_address in self.dependencies_of(address):
            self.inject_dependency(address, dep_address)
        target = self.get_target(address)

      for traversable_spec in target.traversable_dependency_specs:
        self.inject_spec_closure(spec=traversable_spec, relative_to=address.spec_path)
        traversable_spec_target = self.get_target_from_spec(traversable_spec,
                                                            relative_to=address.spec_path)
        if traversable_spec_target not in target.dependencies:
          self.inject_dependency(dependent=target.address,
                                 dependency=traversable_spec_target.address)
          target.mark_transitive_invalidation_hash_dirty()

      for traversable_spec in target.traversable_specs:
        self.inject_spec_closure(spec=traversable_spec, relative_to=address.spec_path)
        target.mark_transitive_invalidation_hash_dirty()

    except AddressLookupError as e:
      raise self.TransitiveLookupError("{message}\n  referenced from {spec}"
                                       .format(message=e, spec=address.spec))


  def inject_spec_closure(self, spec, relative_to=''):
    """Constructs a SyntheticAddress from `spec` and calls `inject_address_closure`.

    :param string spec: A Target spec
    :param string relative_to: The spec_path of the BUILD file this spec was read from.
    """
    address = self._address_mapper.spec_to_address(spec, relative_to=relative_to)
    self.inject_address_closure(address)

  def target_addressable_to_target(self, address, addressable):
    """Realizes a TargetAddressable into a Target at `address`.

    :param TargetAddressable addressable:
    :param Address address:
    """
    try:
      target = addressable.get_target_type()(build_graph=self,
                                             address=address,
                                             **addressable.kwargs)
      target.with_description(addressable.description)
      return target
    except Exception:
      traceback.print_exc()
      logger.exception('Failed to instantiate Target with type {target_type} with name "{name}"'
                       ' at address {address}'
                       .format(target_type=addressable.get_target_type(),
                               name=addressable.name,
                               address=address))
      raise
class TestBuildLocalDistsNativeSources(BuildLocalPythonDistributionsTestBase):

    _extra_relevant_task_types = []

    _dist_specs = OrderedDict([
        ('src/python/dist:universal_dist', {
            'key': 'universal',
            'target_type': PythonDistribution,
            'sources': ['foo.py', 'bar.py', '__init__.py', 'setup.py'],
            'filemap': {
                'src/python/dist/__init__.py':
                '',
                'src/python/dist/foo.py':
                'print("foo")',
                'src/python/dist/bar.py':
                'print("bar")',
                'src/python/dist/setup.py':
                dedent("""
        from setuptools import setup, find_packages
        setup(
          name='universal_dist',
          version='0.0.0',
          packages=find_packages()
        )
      """)
            }
        }),
        ('src/python/plat_specific_dist:plat_specific_dist', {
            'key': 'platform_specific',
            'target_type': PythonDistribution,
            'sources': ['__init__.py', 'setup.py', 'native_source.c'],
            'filemap': {
                'src/python/plat_specific_dist/__init__.py':
                '',
                'src/python/plat_specific_dist/setup.py':
                dedent("""
        from distutils.core import Extension
        from setuptools import setup, find_packages
        setup(
          name='platform_specific_dist',
          version='0.0.0',
          packages=find_packages(),
          extensions=[Extension('native_source', sources=['native_source.c'])]
        )
      """),
                'src/python/plat_specific_dist/native_source.c':
                dedent("""
        #include <Python.h>

        static PyObject * native_source(PyObject *self, PyObject *args) {
          return Py_BuildValue("s", "Hello from C!");
        }

        static PyMethodDef Methods[] = {
          {"native_source", native_source, METH_VARARGS, ""},
          {NULL, NULL, 0, NULL}
        };

        PyMODINIT_FUNC initnative_source(void) {
          (void) Py_InitModule("native_source", Methods);
        }
      """),
            }
        }),
    ])

    def test_python_create_universal_distribution(self):
        universal_dist = self.target_dict['universal']
        context, synthetic_target, snapshot_version = self._create_distribution_synthetic_target(
            universal_dist)
        self.assertEqual(
            ['universal_dist==0.0.0+{}'.format(snapshot_version)],
            [str(x.requirement) for x in synthetic_target.requirements.value])

        local_wheel_products = context.products.get('local_wheels')
        local_wheel = self._retrieve_single_product_at_target_base(
            local_wheel_products, universal_dist)
        _, _, wheel_platform = name_and_platform(local_wheel)
        self.assertEqual('any', wheel_platform)

    def test_python_create_platform_specific_distribution(self):
        platform_specific_dist = self.target_dict['platform_specific']
        context, synthetic_target, snapshot_version = self._create_distribution_synthetic_target(
            platform_specific_dist)
        self.assertEqual(
            ['platform_specific_dist==0.0.0+{}'.format(snapshot_version)],
            [str(x.requirement) for x in synthetic_target.requirements.value])

        local_wheel_products = context.products.get('local_wheels')
        local_wheel = self._retrieve_single_product_at_target_base(
            local_wheel_products, platform_specific_dist)
        self.assertTrue(check_wheel_platform_matches_host(local_wheel))
Beispiel #45
0
class TestBuildLocalDistsWithCtypesNativeSources(
        BuildLocalPythonDistributionsTestBase):

    _extra_relevant_task_types = ([
        CCompile,
        CppCompile,
        LinkSharedLibraries,
    ] + BuildLocalPythonDistributionsTestBase._extra_relevant_task_types)

    _dist_specs = OrderedDict([
        ('src/python/plat_specific_c_dist:ctypes_c_library', {
            'key': 'ctypes_c_library',
            'target_type': CLibrary,
            'ctypes_native_library': NativeArtifact(lib_name='c-math-lib'),
            'sources': ['c_math_lib.c', 'c_math_lib.h'],
            'filemap': {
                'src/python/plat_specific_c_dist/c_math_lib.c':
                dedent("""
        #include "c_math_lib.h"
        int add_two(int x) { return x + 2; }
"""),
                'src/python/plat_specific_c_dist/c_math_lib.h':
                dedent("""
        int add_two(int);
"""),
            }
        }),
        ('src/python/plat_specific_c_dist:plat_specific_ctypes_c_dist', {
            'key': 'platform_specific_ctypes_c_dist',
            'target_type': PythonDistribution,
            'sources': ['__init__.py', 'setup.py'],
            'dependencies':
            ['src/python/plat_specific_c_dist:ctypes_c_library'],
            'filemap': {
                'src/python/plat_specific_c_dist/__init__.py':
                '',
                'src/python/plat_specific_c_dist/setup.py':
                dedent("""
        from setuptools import setup, find_packages
        setup(
          name='platform_specific_ctypes_c_dist',
          version='0.0.0',
          packages=find_packages(),
          data_files=[('', ['libc-math-lib.so'])],
        )
      """),
            }
        }),
        ('src/python/plat_specific_cpp_dist:ctypes_cpp_library', {
            'key': 'ctypes_cpp_library',
            'target_type': CppLibrary,
            'ctypes_native_library': NativeArtifact(lib_name='cpp-math-lib'),
            'sources': ['cpp_math_lib.cpp', 'cpp_math_lib.hpp'],
            'filemap': {
                'src/python/plat_specific_cpp_dist/cpp_math_lib.cpp': '',
                'src/python/plat_specific_cpp_dist/cpp_math_lib.hpp': '',
            },
        }),
        ('src/python/plat_specific_cpp_dist:plat_specific_ctypes_cpp_dist', {
            'key':
            'platform_specific_ctypes_cpp_dist',
            'target_type':
            PythonDistribution,
            'sources': ['__init__.py', 'setup.py'],
            'dependencies':
            ['src/python/plat_specific_cpp_dist:ctypes_cpp_library'],
            'filemap': {
                'src/python/plat_specific_cpp_dist/__init__.py':
                '',
                'src/python/plat_specific_cpp_dist/setup.py':
                dedent("""
        from setuptools import setup, find_packages
        setup(
          name='platform_specific_ctypes_cpp_dist',
          version='0.0.0',
          packages=find_packages(),
          data_files=[('', ['libcpp-math-lib.so'])],
        )
      """),
            }
        }),
    ])

    def test_ctypes_c_dist(self):
        platform_specific_dist = self.target_dict[
            'platform_specific_ctypes_c_dist']
        context, synthetic_target, snapshot_version = self._create_distribution_synthetic_target(
            platform_specific_dist,
            extra_targets=[self.target_dict['ctypes_c_library']])
        self.assertEqual([
            'platform_specific_ctypes_c_dist==0.0.0+{}'.format(
                snapshot_version)
        ], [str(x.requirement) for x in synthetic_target.requirements.value])
        local_wheel_products = context.products.get('local_wheels')
        local_wheel = self._retrieve_single_product_at_target_base(
            local_wheel_products, platform_specific_dist)
        self.assertTrue(check_wheel_platform_matches_host(local_wheel))

    def test_ctypes_cpp_dist(self):
        platform_specific_dist = self.target_dict[
            'platform_specific_ctypes_cpp_dist']
        context, synthetic_target, snapshot_version = self._create_distribution_synthetic_target(
            platform_specific_dist,
            extra_targets=[self.target_dict['ctypes_cpp_library']])
        self.assertEqual([
            'platform_specific_ctypes_cpp_dist==0.0.0+{}'.format(
                snapshot_version)
        ], [str(x.requirement) for x in synthetic_target.requirements.value])

        local_wheel_products = context.products.get('local_wheels')
        local_wheel = self._retrieve_single_product_at_target_base(
            local_wheel_products, platform_specific_dist)
        self.assertTrue(check_wheel_platform_matches_host(local_wheel))
class PerPathDatapoints(Thread):
  PURGE_SLEEP_TIME = 2  # sleep time between purging old datapoints
  DEFAULT_TOP_RESULTS = 10  # number of (top) results to show by default

  def __init__(self, older_than=120, aggregation_depth=0):
    """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
    self._older_than = older_than
    self._aggregation_depth = aggregation_depth
    self._requests_by_timestamp = OrderedDict()
    self._lock = Lock()

    super(PerPathDatapoints, self).__init__()

  def size(self):
    size = {"samples": 0, "requests_mem_usage": 0}
    with self._lock:
      samples, mem_usage = 0, 0
      for reqs in self._requests_by_timestamp.values():
        samples += len(reqs)
        mem_usage += sum(sys.getsizeof(r) for r in reqs)

    size["samples"] = samples
    size["requests_mem_usage"] = mem_usage
    size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"])
    size["ordered_dict_mem_usage"] = sizeof_fmt(sys.getsizeof(self._requests_by_timestamp))

    return size

  def run(self):
    """ drop samples that are too old """
    while True:
      time.sleep(self.PURGE_SLEEP_TIME)
      old_tstamp = time.time() - self._older_than
      with self._lock:
        for tstamp in self._requests_by_timestamp.keys():
          if tstamp < old_tstamp:
            del self._requests_by_timestamp[tstamp]

  def handle_request(self, request):
    if self._aggregation_depth > 0:
      request.path = intern(request.parent_path(self._aggregation_depth))

    with self._lock:
      tstamp = int(time.time())
      if tstamp not in self._requests_by_timestamp:
        self._requests_by_timestamp[tstamp] = []
      self._requests_by_timestamp[tstamp].append(request)

  def sum_minute(self, top=DEFAULT_TOP_RESULTS, order_by=Counters.WRITES,
                 display=[Counters.ALL], view=AccumulatedStats.VIEW_BY_PATH):
    now = int(time.time())
    old = now - NUMBER_OF_DATAPOINTS
    stats = AccumulatedStats(StatsConfig())

    with self._lock:
      # note that this is an OrderedDict so samples are in chronological order
      for tstamp in self._requests_by_timestamp.keys():
        if tstamp < old:
          continue

        if tstamp > now:
          break

        for r in self._requests_by_timestamp[tstamp]:
          stats.handle_request(r)

    return stats.dict(top=top,
                      order_by=order_by,
                      display_filters=display,
                      view=view)

  def datapoints_writes(self):
    return self._filter_datapoints(condition=lambda req: req.is_write)

  def datapoints_reads(self):
    return self._filter_datapoints(condition=lambda req: not req.is_write)

  def datapoints_for_op(self, op):
    return self._filter_datapoints(condition=lambda req: req.opcode == op)

  def datapoints_by_path_for_op(self, op, top):
    """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc.
        because why use Python if you can't abuse types?
        top is the number of results
    """
    if op == "writes":
      return self._datapoints_by_path_for_op_impl(lambda r: r.is_write, top)
    elif op == "reads":
      return self._datapoints_by_path_for_op_impl(lambda r: not r.is_write, top)
    else:
      return self._datapoints_by_path_for_op_impl(lambda r: r.opcode == op, top)

  def _datapoints_by_path_for_op_impl(self, request_filter, top):
    """ to make this moderately efficient we use a dict that
    provides a pre-populated list of datapoints.
    """
    tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
    datapoints = PathDatapoints()
    with self._lock:
      for i in range(0, NUMBER_OF_DATAPOINTS):
        if tstamp in self._requests_by_timestamp:
          for req in self._requests_by_timestamp[tstamp]:
            if request_filter(req):
              dp = datapoints[req.path][i][1] + 1
              datapoints[req.path][i] = (i, dp)
        tstamp += 1

    # sort
    def comparator(path_a, path_b):
      sum_a = sum(d[1] for d in datapoints[path_a])
      sum_b = sum(d[1] for d in datapoints[path_b])
      return sum_b - sum_a
    paths = sorted(datapoints.keys(), comparator)

    if len(paths) == 0:
      return [("/", datapoints["/"])]

    return [(p, datapoints[p]) for p in paths[0:top]]

  def _filter_datapoints(self, condition):
    tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
    datapoints = []
    for i in range(0, NUMBER_OF_DATAPOINTS):
      aggregate = sum(bool(condition(req)) for req in self._requests_by_timestamp.get(tstamp, []))
      datapoints.append((i, aggregate))
      tstamp += 1

    return datapoints
Beispiel #47
0
    def attempt(self, timer, explain):
      """Executes the named phase against the current context tracking goal executions in executed.
      """

      def execute_task(goal, task, targets):
        """Execute and time a single goal that has had all of its dependencies satisfied."""
        with timer.timed(goal):
          # TODO (Senthil Kumaran):
          # Possible refactoring of the Task Execution Logic (AWESOME-1019)
          if explain:
            self._context.log.debug("Skipping execution of %s in explain mode" % goal.name)
          else:
            task.execute(targets)

      goals = self._phase.goals()
      if not goals:
        raise TaskError('No goals installed for phase %s' % self._phase)

      run_queue = []
      goals_by_group = {}
      for goal in goals:
        if goal.group:
          group_name = goal.group.name
          if group_name not in goals_by_group:
            group_goals = [goal]
            run_queue.append((group_name, group_goals))
            goals_by_group[group_name] = group_goals
          else:
            goals_by_group[group_name].append(goal)
        else:
          run_queue.append((None, [goal]))

      with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]):
        # OrderedSet takes care of not repeating chunked task execution mentions
        execution_phases = defaultdict(OrderedSet)

        for group_name, goals in run_queue:
          if not group_name:
            goal = goals[0]
            execution_phases[self._phase].add(goal.name)
            with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
              execute_task(goal, self._tasks_by_goal[goal], self._context.targets())
          else:
            with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]):
              goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals)

              # First, divide the set of all targets to be built into compatible chunks, based
              # on their declared exclusives. Then, for each chunk of compatible exclusives, do
              # further sub-chunking. At the end, we'll have a list of chunks to be built,
              # which will go through the chunks of each exclusives-compatible group separately.

              # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves.
              # Either interleave the ivy task in this group so that it runs once for each batch of
              # chunks with compatible exclusives, or make the compilation tasks do their own ivy
              # resolves for each batch of targets they're asked to compile.

              goal_chunks = []

              # We won't have exclusives calculated if stopping short for example during an explain.
              if explain:
                exclusive_chunks = [self._context.targets()]
              else:
                exclusive_chunks = ExclusivesIterator.from_context(self._context)

              for exclusive_chunk in exclusive_chunks:
                # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than
                # is_internal, however, at this time python targets are not internal targets.
                group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk),
                                             goals_by_group_member.keys())
                goal_chunks.extend(group_chunks)

              self._context.log.debug('::: created chunks(%d)' % len(goal_chunks))
              for i, (group_member, goal_chunk) in enumerate(goal_chunks):
                self._context.log.debug('  chunk(%d) [flavor=%s]:\n\t%s' % (
                    i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk)))))

              for group_member, goal_chunk in goal_chunks:
                goal = goals_by_group_member[group_member]
                execution_phases[self._phase].add((group_name, goal.name))
                with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
                  execute_task(goal, self._tasks_by_goal[goal], goal_chunk)

        if explain:
          tasks_by_goalname = dict((goal.name, task.__class__.__name__)
                                   for goal, task in self._tasks_by_goal.items())

          def expand_goal(goal):
            if len(goal) == 2:  # goal is (group, goal)
              group_name, goal_name = goal
              task_name = tasks_by_goalname[goal_name]
              return "%s:%s->%s" % (group_name, goal_name, task_name)
            else:
              task_name = tasks_by_goalname[goal]
              return "%s->%s" % (goal, task_name)

          for phase, goals in execution_phases.items():
            goal_to_task = ", ".join(expand_goal(goal) for goal in goals)
            print("%s [%s]" % (phase, goal_to_task))
Beispiel #48
0
    def attempt(self, timer, explain):
      """Executes the named phase against the current context tracking goal executions in executed.
      """

      def execute_task(goal, task, targets):
        """Execute and time a single goal that has had all of its dependencies satisfied."""
        with timer.timed(goal):
          # TODO (Senthil Kumaran):
          # Possible refactoring of the Task Execution Logic (AWESOME-1019)
          if explain:
            self._context.log.debug("Skipping execution of %s in explain mode" % goal.name)
          else:
            task.execute(targets)

      goals = self._phase.goals()
      if not goals:
        raise TaskError('No goals installed for phase %s' % self._phase)

      run_queue = []
      goals_by_group = {}
      for goal in goals:
        if goal.group:
          group_name = goal.group.name
          if group_name not in goals_by_group:
            group_goals = [goal]
            run_queue.append((group_name, group_goals))
            goals_by_group[group_name] = group_goals
          else:
            goals_by_group[group_name].append(goal)
        else:
          run_queue.append((None, [goal]))

      with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]):
        # OrderedSet takes care of not repeating chunked task execution mentions
        execution_phases = defaultdict(OrderedSet)

        for group_name, goals in run_queue:
          if not group_name:
            goal = goals[0]
            execution_phases[self._phase].add(goal.name)
            with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
              execute_task(goal, self._tasks_by_goal[goal], self._context.targets())
          else:
            with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]):
              goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals)

              # First, divide the set of all targets to be built into compatible chunks, based
              # on their declared exclusives. Then, for each chunk of compatible exclusives, do
              # further sub-chunking. At the end, we'll have a list of chunks to be built,
              # which will go through the chunks of each exclusives-compatible group separately.

              # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves.
              # Either interleave the ivy task in this group so that it runs once for each batch of
              # chunks with compatible exclusives, or make the compilation tasks do their own ivy
              # resolves for each batch of targets they're asked to compile.

              goal_chunks = []

              # We won't have exclusives calculated if stopping short for example during an explain.
              if explain:
                exclusive_chunks = [self._context.targets()]
              else:
                exclusive_chunks = ExclusivesIterator.from_context(self._context)

              for exclusive_chunk in exclusive_chunks:
                # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than
                # is_internal, however, at this time python targets are not internal targets.
                group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk),
                                             goals_by_group_member.keys())
                goal_chunks.extend(group_chunks)

              self._context.log.debug('::: created chunks(%d)' % len(goal_chunks))
              for i, (group_member, goal_chunk) in enumerate(goal_chunks):
                self._context.log.debug('  chunk(%d) [flavor=%s]:\n\t%s' % (
                    i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk)))))

              for group_member, goal_chunk in goal_chunks:
                goal = goals_by_group_member[group_member]
                execution_phases[self._phase].add((group_name, goal.name))
                with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
                  execute_task(goal, self._tasks_by_goal[goal], goal_chunk)

        if explain:
          tasks_by_goalname = dict((goal.name, task.__class__.__name__)
                                   for goal, task in self._tasks_by_goal.items())

          def expand_goal(goal):
            if len(goal) == 2:  # goal is (group, goal)
              group_name, goal_name = goal
              task_name = tasks_by_goalname[goal_name]
              return "%s:%s->%s" % (group_name, goal_name, task_name)
            else:
              task_name = tasks_by_goalname[goal]
              return "%s->%s" % (goal, task_name)

          for phase, goals in execution_phases.items():
            goal_to_task = ", ".join(expand_goal(goal) for goal in goals)
            print("%s [%s]" % (phase, goal_to_task))