Ejemplo n.º 1
0
  def compile(self, classpath, sources, output_dir, analysis_cache, upstream_analysis_caches, depfile):
    safe_mkdir(output_dir)
    compiler_classpath = nailgun_profile_classpath(self, self._compile_profile)
    compiler_args = []

    # TODO(John Sirois): separate compiler profile from runtime profile
    compiler_args.extend([
      # Support for outputting a dependencies file of source -> class
      '-Xplugin:%s' % self.get_depemitter_plugin(),
      '-P:depemitter:file:%s' % depfile
    ])
    compiler_args.extend(self._args)

    # To pass options to scalac simply prefix with -S.
    args = ['-S' + x for x in compiler_args]

    def analysis_cache_full_path(analysis_cache_product):
      # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}.
      if len(analysis_cache_product) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems().next()
      if len(analysis_cache_files) != 1:
        raise TaskError('There can only be one analysis cache file per output directory')
      return os.path.join(analysis_cache_dir, analysis_cache_files[0])

    # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>.
    analysis_map = \
      OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ])

    if len(analysis_map) > 0:
      args.extend([ '-analysis-map', ','.join(['%s:%s' % kv for kv in analysis_map.items()]) ])
    upstream_classes_dirs = analysis_map.keys()

    zinc_classpath = nailgun_profile_classpath(self, self._zinc_profile)
    zinc_jars = ScalaCompile.identify_zinc_jars(compiler_classpath, zinc_classpath)
    for (name, jarpath) in zinc_jars.items():  # The zinc jar names are also the flag names.
      args.extend(['-%s' % name, jarpath])

    args.extend([
      '-analysis-cache', analysis_cache,
      '-log-level', self.context.options.log_level or 'info',
      '-classpath', ':'.join(zinc_classpath + classpath + upstream_classes_dirs),
      '-d', output_dir
    ])

    if not self._color:
      args.append('-no-color')

    args.extend(sources)

    self.context.log.debug('Executing: %s %s' % (self._main, ' '.join(args)))
    return self.runjava(self._main, classpath=zinc_classpath, args=args, jvmargs=self._jvm_args)
Ejemplo n.º 2
0
    def attempt(self, timer, explain):
      """Executes the named phase against the current context tracking goal executions in executed.
      """

      def execute_task(goal, task, targets):
        """Execute and time a single goal that has had all of its dependencies satisfied."""
        with timer.timed(goal):
          # TODO (Senthil Kumaran):
          # Possible refactoring of the Task Execution Logic (AWESOME-1019)
          if explain:
            self._context.log.debug("Skipping execution of %s in explain mode" % goal.name)
          else:
            task.execute(targets)

      goals = self._phase.goals()
      if not goals:
        raise TaskError('No goals installed for phase %s' % self._phase)

      run_queue = []
      goals_by_group = {}
      for goal in goals:
        if goal.group:
          group_name = goal.group.name
          if group_name not in goals_by_group:
            group_goals = [goal]
            run_queue.append((group_name, group_goals))
            goals_by_group[group_name] = group_goals
          else:
            goals_by_group[group_name].append(goal)
        else:
          run_queue.append((None, [goal]))

      with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]):
        # OrderedSet takes care of not repeating chunked task execution mentions
        execution_phases = defaultdict(OrderedSet)

        for group_name, goals in run_queue:
          if not group_name:
            goal = goals[0]
            execution_phases[self._phase].add(goal.name)
            with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
              execute_task(goal, self._tasks_by_goal[goal], self._context.targets())
          else:
            with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]):
              goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals)

              # First, divide the set of all targets to be built into compatible chunks, based
              # on their declared exclusives. Then, for each chunk of compatible exclusives, do
              # further sub-chunking. At the end, we'll have a list of chunks to be built,
              # which will go through the chunks of each exclusives-compatible group separately.

              # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves.
              # Either interleave the ivy task in this group so that it runs once for each batch of
              # chunks with compatible exclusives, or make the compilation tasks do their own ivy
              # resolves for each batch of targets they're asked to compile.

              goal_chunks = []

              # We won't have exclusives calculated if stopping short for example during an explain.
              if explain:
                exclusive_chunks = [self._context.targets()]
              else:
                exclusive_chunks = ExclusivesIterator.from_context(self._context)

              for exclusive_chunk in exclusive_chunks:
                # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than
                # is_internal, however, at this time python targets are not internal targets.
                group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk),
                                             goals_by_group_member.keys())
                goal_chunks.extend(group_chunks)

              self._context.log.debug('::: created chunks(%d)' % len(goal_chunks))
              for i, (group_member, goal_chunk) in enumerate(goal_chunks):
                self._context.log.debug('  chunk(%d) [flavor=%s]:\n\t%s' % (
                    i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk)))))

              for group_member, goal_chunk in goal_chunks:
                goal = goals_by_group_member[group_member]
                execution_phases[self._phase].add((group_name, goal.name))
                with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
                  execute_task(goal, self._tasks_by_goal[goal], goal_chunk)

        if explain:
          tasks_by_goalname = dict((goal.name, task.__class__.__name__)
                                   for goal, task in self._tasks_by_goal.items())

          def expand_goal(goal):
            if len(goal) == 2:  # goal is (group, goal)
              group_name, goal_name = goal
              task_name = tasks_by_goalname[goal_name]
              return "%s:%s->%s" % (group_name, goal_name, task_name)
            else:
              task_name = tasks_by_goalname[goal]
              return "%s->%s" % (goal, task_name)

          for phase, goals in execution_phases.items():
            goal_to_task = ", ".join(expand_goal(goal) for goal in goals)
            print("%s [%s]" % (phase, goal_to_task))
Ejemplo n.º 3
0
    def attempt(self, timer, explain):
      """Executes the named phase against the current context tracking goal executions in executed.
      """

      def execute_task(goal, task, targets):
        """Execute and time a single goal that has had all of its dependencies satisfied."""
        with timer.timed(goal):
          # TODO (Senthil Kumaran):
          # Possible refactoring of the Task Execution Logic (AWESOME-1019)
          if explain:
            self._context.log.debug("Skipping execution of %s in explain mode" % goal.name)
          else:
            task.execute(targets)

      goals = self._phase.goals()
      if not goals:
        raise TaskError('No goals installed for phase %s' % self._phase)

      run_queue = []
      goals_by_group = {}
      for goal in goals:
        if goal.group:
          group_name = goal.group.name
          if group_name not in goals_by_group:
            group_goals = [goal]
            run_queue.append((group_name, group_goals))
            goals_by_group[group_name] = group_goals
          else:
            goals_by_group[group_name].append(goal)
        else:
          run_queue.append((None, [goal]))

      with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]):
        # OrderedSet takes care of not repeating chunked task execution mentions
        execution_phases = defaultdict(OrderedSet)

        for group_name, goals in run_queue:
          if not group_name:
            goal = goals[0]
            execution_phases[self._phase].add(goal.name)
            with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
              execute_task(goal, self._tasks_by_goal[goal], self._context.targets())
          else:
            with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]):
              goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals)

              # First, divide the set of all targets to be built into compatible chunks, based
              # on their declared exclusives. Then, for each chunk of compatible exclusives, do
              # further sub-chunking. At the end, we'll have a list of chunks to be built,
              # which will go through the chunks of each exclusives-compatible group separately.

              # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves.
              # Either interleave the ivy task in this group so that it runs once for each batch of
              # chunks with compatible exclusives, or make the compilation tasks do their own ivy
              # resolves for each batch of targets they're asked to compile.

              goal_chunks = []

              # We won't have exclusives calculated if stopping short for example during an explain.
              if explain:
                exclusive_chunks = [self._context.targets()]
              else:
                exclusive_chunks = ExclusivesIterator.from_context(self._context)

              for exclusive_chunk in exclusive_chunks:
                # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than
                # is_internal, however, at this time python targets are not internal targets.
                group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk),
                                             goals_by_group_member.keys())
                goal_chunks.extend(group_chunks)

              self._context.log.debug('::: created chunks(%d)' % len(goal_chunks))
              for i, (group_member, goal_chunk) in enumerate(goal_chunks):
                self._context.log.debug('  chunk(%d) [flavor=%s]:\n\t%s' % (
                    i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk)))))

              for group_member, goal_chunk in goal_chunks:
                goal = goals_by_group_member[group_member]
                execution_phases[self._phase].add((group_name, goal.name))
                with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]):
                  execute_task(goal, self._tasks_by_goal[goal], goal_chunk)

        if explain:
          tasks_by_goalname = dict((goal.name, task.__class__.__name__)
                                   for goal, task in self._tasks_by_goal.items())

          def expand_goal(goal):
            if len(goal) == 2:  # goal is (group, goal)
              group_name, goal_name = goal
              task_name = tasks_by_goalname[goal_name]
              return "%s:%s->%s" % (group_name, goal_name, task_name)
            else:
              task_name = tasks_by_goalname[goal]
              return "%s->%s" % (goal, task_name)

          for phase, goals in execution_phases.items():
            goal_to_task = ", ".join(expand_goal(goal) for goal in goals)
            print("%s [%s]" % (phase, goal_to_task))
Ejemplo n.º 4
0
class PerPathDatapoints(Thread):
    PURGE_SLEEP_TIME = 2  # sleep time between purging old datapoints
    DEFAULT_TOP_RESULTS = 10  # number of (top) results to show by default

    def __init__(self, older_than=120, aggregation_depth=0):
        """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
        self._older_than = older_than
        self._aggregation_depth = aggregation_depth
        self._requests_by_timestamp = OrderedDict()
        self._lock = Lock()

        super(PerPathDatapoints, self).__init__()

    def size(self):
        size = {"samples": 0, "requests_mem_usage": 0}
        with self._lock:
            samples, mem_usage = 0, 0
            for reqs in self._requests_by_timestamp.values():
                samples += len(reqs)
                mem_usage += sum(sys.getsizeof(r) for r in reqs)

        size["samples"] = samples
        size["requests_mem_usage"] = mem_usage
        size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"])
        size["ordered_dict_mem_usage"] = sizeof_fmt(
            sys.getsizeof(self._requests_by_timestamp))

        return size

    def run(self):
        """ drop samples that are too old """
        while True:
            time.sleep(self.PURGE_SLEEP_TIME)
            old_tstamp = time.time() - self._older_than
            with self._lock:
                for tstamp in self._requests_by_timestamp.keys():
                    if tstamp < old_tstamp:
                        del self._requests_by_timestamp[tstamp]

    def handle_request(self, request):
        if self._aggregation_depth > 0:
            request.path = intern(request.parent_path(self._aggregation_depth))

        with self._lock:
            tstamp = int(time.time())
            if tstamp not in self._requests_by_timestamp:
                self._requests_by_timestamp[tstamp] = []
            self._requests_by_timestamp[tstamp].append(request)

    def sum_minute(self,
                   top=DEFAULT_TOP_RESULTS,
                   order_by=Counters.WRITES,
                   display=[Counters.ALL],
                   view=AccumulatedStats.VIEW_BY_PATH):
        now = int(time.time())
        old = now - NUMBER_OF_DATAPOINTS
        stats = AccumulatedStats(StatsConfig())

        with self._lock:
            # note that this is an OrderedDict so samples are in chronological order
            for tstamp in self._requests_by_timestamp.keys():
                if tstamp < old:
                    continue

                if tstamp > now:
                    break

                for r in self._requests_by_timestamp[tstamp]:
                    stats.handle_request(r)

        return stats.dict(top=top,
                          order_by=order_by,
                          display_filters=display,
                          view=view)

    def datapoints_writes(self):
        return self._filter_datapoints(condition=lambda req: req.is_write)

    def datapoints_reads(self):
        return self._filter_datapoints(condition=lambda req: not req.is_write)

    def datapoints_for_op(self, op):
        return self._filter_datapoints(condition=lambda req: req.opcode == op)

    def datapoints_by_path_for_op(self, op, top):
        """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc.
        because why use Python if you can't abuse types?
        top is the number of results
    """
        if op == "writes":
            return self._datapoints_by_path_for_op_impl(
                lambda r: r.is_write, top)
        elif op == "reads":
            return self._datapoints_by_path_for_op_impl(
                lambda r: not r.is_write, top)
        else:
            return self._datapoints_by_path_for_op_impl(
                lambda r: r.opcode == op, top)

    def _datapoints_by_path_for_op_impl(self, request_filter, top):
        """ to make this moderately efficient we use a dict that
    provides a pre-populated list of datapoints.
    """
        tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
        datapoints = PathDatapoints()
        with self._lock:
            for i in range(0, NUMBER_OF_DATAPOINTS):
                if tstamp in self._requests_by_timestamp:
                    for req in self._requests_by_timestamp[tstamp]:
                        if request_filter(req):
                            dp = datapoints[req.path][i][1] + 1
                            datapoints[req.path][i] = (i, dp)
                tstamp += 1

        # sort
        def comparator(path_a, path_b):
            sum_a = sum(d[1] for d in datapoints[path_a])
            sum_b = sum(d[1] for d in datapoints[path_b])
            return sum_b - sum_a

        paths = sorted(datapoints.keys(), comparator)

        if len(paths) == 0:
            return [("/", datapoints["/"])]

        return [(p, datapoints[p]) for p in paths[0:top]]

    def _filter_datapoints(self, condition):
        tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
        datapoints = []
        for i in range(0, NUMBER_OF_DATAPOINTS):
            aggregate = sum(
                bool(condition(req))
                for req in self._requests_by_timestamp.get(tstamp, []))
            datapoints.append((i, aggregate))
            tstamp += 1

        return datapoints
Ejemplo n.º 5
0
class PerPathDatapoints(Thread):
  PURGE_SLEEP_TIME = 2  # sleep time between purging old datapoints
  DEFAULT_TOP_RESULTS = 10  # number of (top) results to show by default

  def __init__(self, older_than=120, aggregation_depth=0):
    """
    datapoints that are `older_than` will be dropped
    if aggregation_depth > 0 then we aggregate for paths up to that depth
    """
    self._older_than = older_than
    self._aggregation_depth = aggregation_depth
    self._requests_by_timestamp = OrderedDict()
    self._lock = Lock()

    super(PerPathDatapoints, self).__init__()

  def size(self):
    size = {"samples": 0, "requests_mem_usage": 0}
    with self._lock:
      samples, mem_usage = 0, 0
      for reqs in self._requests_by_timestamp.values():
        samples += len(reqs)
        mem_usage += sum(sys.getsizeof(r) for r in reqs)

    size["samples"] = samples
    size["requests_mem_usage"] = mem_usage
    size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"])
    size["ordered_dict_mem_usage"] = sizeof_fmt(sys.getsizeof(self._requests_by_timestamp))

    return size

  def run(self):
    """ drop samples that are too old """
    while True:
      time.sleep(self.PURGE_SLEEP_TIME)
      old_tstamp = time.time() - self._older_than
      with self._lock:
        for tstamp in self._requests_by_timestamp.keys():
          if tstamp < old_tstamp:
            del self._requests_by_timestamp[tstamp]

  def handle_request(self, request):
    if self._aggregation_depth > 0:
      request.path = intern(request.parent_path(self._aggregation_depth))

    with self._lock:
      tstamp = int(time.time())
      if tstamp not in self._requests_by_timestamp:
        self._requests_by_timestamp[tstamp] = []
      self._requests_by_timestamp[tstamp].append(request)

  def sum_minute(self, top=DEFAULT_TOP_RESULTS, order_by=Counters.WRITES,
                 display=[Counters.ALL], view=AccumulatedStats.VIEW_BY_PATH):
    now = int(time.time())
    old = now - NUMBER_OF_DATAPOINTS
    stats = AccumulatedStats(StatsConfig())

    with self._lock:
      # note that this is an OrderedDict so samples are in chronological order
      for tstamp in self._requests_by_timestamp.keys():
        if tstamp < old:
          continue

        if tstamp > now:
          break

        for r in self._requests_by_timestamp[tstamp]:
          stats.handle_request(r)

    return stats.dict(top=top,
                      order_by=order_by,
                      display_filters=display,
                      view=view)

  def datapoints_writes(self):
    return self._filter_datapoints(condition=lambda req: req.is_write)

  def datapoints_reads(self):
    return self._filter_datapoints(condition=lambda req: not req.is_write)

  def datapoints_for_op(self, op):
    return self._filter_datapoints(condition=lambda req: req.opcode == op)

  def datapoints_by_path_for_op(self, op, top):
    """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc.
        because why use Python if you can't abuse types?
        top is the number of results
    """
    if op == "writes":
      return self._datapoints_by_path_for_op_impl(lambda r: r.is_write, top)
    elif op == "reads":
      return self._datapoints_by_path_for_op_impl(lambda r: not r.is_write, top)
    else:
      return self._datapoints_by_path_for_op_impl(lambda r: r.opcode == op, top)

  def _datapoints_by_path_for_op_impl(self, request_filter, top):
    """ to make this moderately efficient we use a dict that
    provides a pre-populated list of datapoints.
    """
    tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
    datapoints = PathDatapoints()
    with self._lock:
      for i in range(0, NUMBER_OF_DATAPOINTS):
        if tstamp in self._requests_by_timestamp:
          for req in self._requests_by_timestamp[tstamp]:
            if request_filter(req):
              dp = datapoints[req.path][i][1] + 1
              datapoints[req.path][i] = (i, dp)
        tstamp += 1

    # sort
    def comparator(path_a, path_b):
      sum_a = sum(d[1] for d in datapoints[path_a])
      sum_b = sum(d[1] for d in datapoints[path_b])
      return sum_b - sum_a
    paths = sorted(datapoints.keys(), comparator)

    if len(paths) == 0:
      return [("/", datapoints["/"])]

    return [(p, datapoints[p]) for p in paths[0:top]]

  def _filter_datapoints(self, condition):
    tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS
    datapoints = []
    for i in range(0, NUMBER_OF_DATAPOINTS):
      aggregate = sum(bool(condition(req)) for req in self._requests_by_timestamp.get(tstamp, []))
      datapoints.append((i, aggregate))
      tstamp += 1

    return datapoints