def compile(self, classpath, sources, output_dir, analysis_cache, upstream_analysis_caches, depfile): safe_mkdir(output_dir) compiler_classpath = nailgun_profile_classpath(self, self._compile_profile) compiler_args = [] # TODO(John Sirois): separate compiler profile from runtime profile compiler_args.extend([ # Support for outputting a dependencies file of source -> class '-Xplugin:%s' % self.get_depemitter_plugin(), '-P:depemitter:file:%s' % depfile ]) compiler_args.extend(self._args) # To pass options to scalac simply prefix with -S. args = ['-S' + x for x in compiler_args] def analysis_cache_full_path(analysis_cache_product): # We expect the argument to be { analysis_cache_dir, [ analysis_cache_file ]}. if len(analysis_cache_product) != 1: raise TaskError('There can only be one analysis cache file per output directory') analysis_cache_dir, analysis_cache_files = analysis_cache_product.iteritems().next() if len(analysis_cache_files) != 1: raise TaskError('There can only be one analysis cache file per output directory') return os.path.join(analysis_cache_dir, analysis_cache_files[0]) # Strings of <output dir>:<full path to analysis cache file for the classes in that dir>. analysis_map = \ OrderedDict([ (k, analysis_cache_full_path(v)) for k, v in upstream_analysis_caches.itermappings() ]) if len(analysis_map) > 0: args.extend([ '-analysis-map', ','.join(['%s:%s' % kv for kv in analysis_map.items()]) ]) upstream_classes_dirs = analysis_map.keys() zinc_classpath = nailgun_profile_classpath(self, self._zinc_profile) zinc_jars = ScalaCompile.identify_zinc_jars(compiler_classpath, zinc_classpath) for (name, jarpath) in zinc_jars.items(): # The zinc jar names are also the flag names. args.extend(['-%s' % name, jarpath]) args.extend([ '-analysis-cache', analysis_cache, '-log-level', self.context.options.log_level or 'info', '-classpath', ':'.join(zinc_classpath + classpath + upstream_classes_dirs), '-d', output_dir ]) if not self._color: args.append('-no-color') args.extend(sources) self.context.log.debug('Executing: %s %s' % (self._main, ' '.join(args))) return self.runjava(self._main, classpath=zinc_classpath, args=args, jvmargs=self._jvm_args)
def attempt(self, timer, explain): """Executes the named phase against the current context tracking goal executions in executed. """ def execute_task(goal, task, targets): """Execute and time a single goal that has had all of its dependencies satisfied.""" with timer.timed(goal): # TODO (Senthil Kumaran): # Possible refactoring of the Task Execution Logic (AWESOME-1019) if explain: self._context.log.debug("Skipping execution of %s in explain mode" % goal.name) else: task.execute(targets) goals = self._phase.goals() if not goals: raise TaskError('No goals installed for phase %s' % self._phase) run_queue = [] goals_by_group = {} for goal in goals: if goal.group: group_name = goal.group.name if group_name not in goals_by_group: group_goals = [goal] run_queue.append((group_name, group_goals)) goals_by_group[group_name] = group_goals else: goals_by_group[group_name].append(goal) else: run_queue.append((None, [goal])) with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]): # OrderedSet takes care of not repeating chunked task execution mentions execution_phases = defaultdict(OrderedSet) for group_name, goals in run_queue: if not group_name: goal = goals[0] execution_phases[self._phase].add(goal.name) with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]): execute_task(goal, self._tasks_by_goal[goal], self._context.targets()) else: with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]): goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals) # First, divide the set of all targets to be built into compatible chunks, based # on their declared exclusives. Then, for each chunk of compatible exclusives, do # further sub-chunking. At the end, we'll have a list of chunks to be built, # which will go through the chunks of each exclusives-compatible group separately. # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves. # Either interleave the ivy task in this group so that it runs once for each batch of # chunks with compatible exclusives, or make the compilation tasks do their own ivy # resolves for each batch of targets they're asked to compile. goal_chunks = [] # We won't have exclusives calculated if stopping short for example during an explain. if explain: exclusive_chunks = [self._context.targets()] else: exclusive_chunks = ExclusivesIterator.from_context(self._context) for exclusive_chunk in exclusive_chunks: # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than # is_internal, however, at this time python targets are not internal targets. group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk), goals_by_group_member.keys()) goal_chunks.extend(group_chunks) self._context.log.debug('::: created chunks(%d)' % len(goal_chunks)) for i, (group_member, goal_chunk) in enumerate(goal_chunks): self._context.log.debug(' chunk(%d) [flavor=%s]:\n\t%s' % ( i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk))))) for group_member, goal_chunk in goal_chunks: goal = goals_by_group_member[group_member] execution_phases[self._phase].add((group_name, goal.name)) with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]): execute_task(goal, self._tasks_by_goal[goal], goal_chunk) if explain: tasks_by_goalname = dict((goal.name, task.__class__.__name__) for goal, task in self._tasks_by_goal.items()) def expand_goal(goal): if len(goal) == 2: # goal is (group, goal) group_name, goal_name = goal task_name = tasks_by_goalname[goal_name] return "%s:%s->%s" % (group_name, goal_name, task_name) else: task_name = tasks_by_goalname[goal] return "%s->%s" % (goal, task_name) for phase, goals in execution_phases.items(): goal_to_task = ", ".join(expand_goal(goal) for goal in goals) print("%s [%s]" % (phase, goal_to_task))
def attempt(self, timer, explain): """Executes the named phase against the current context tracking goal executions in executed. """ def execute_task(goal, task, targets): """Execute and time a single goal that has had all of its dependencies satisfied.""" with timer.timed(goal): # TODO (Senthil Kumaran): # Possible refactoring of the Task Execution Logic (AWESOME-1019) if explain: self._context.log.debug("Skipping execution of %s in explain mode" % goal.name) else: task.execute(targets) goals = self._phase.goals() if not goals: raise TaskError('No goals installed for phase %s' % self._phase) run_queue = [] goals_by_group = {} for goal in goals: if goal.group: group_name = goal.group.name if group_name not in goals_by_group: group_goals = [goal] run_queue.append((group_name, group_goals)) goals_by_group[group_name] = group_goals else: goals_by_group[group_name].append(goal) else: run_queue.append((None, [goal])) with self._context.new_workunit(name=self._phase.name, labels=[WorkUnit.PHASE]): # OrderedSet takes care of not repeating chunked task execution mentions execution_phases = defaultdict(OrderedSet) for group_name, goals in run_queue: if not group_name: goal = goals[0] execution_phases[self._phase].add(goal.name) with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]): execute_task(goal, self._tasks_by_goal[goal], self._context.targets()) else: with self._context.new_workunit(name=group_name, labels=[WorkUnit.GROUP]): goals_by_group_member = OrderedDict((GroupMember.from_goal(g), g) for g in goals) # First, divide the set of all targets to be built into compatible chunks, based # on their declared exclusives. Then, for each chunk of compatible exclusives, do # further sub-chunking. At the end, we'll have a list of chunks to be built, # which will go through the chunks of each exclusives-compatible group separately. # TODO(markcc); chunks with incompatible exclusives require separate ivy resolves. # Either interleave the ivy task in this group so that it runs once for each batch of # chunks with compatible exclusives, or make the compilation tasks do their own ivy # resolves for each batch of targets they're asked to compile. goal_chunks = [] # We won't have exclusives calculated if stopping short for example during an explain. if explain: exclusive_chunks = [self._context.targets()] else: exclusive_chunks = ExclusivesIterator.from_context(self._context) for exclusive_chunk in exclusive_chunks: # TODO(Travis Crawford): Targets should be filtered by is_concrete rather than # is_internal, however, at this time python targets are not internal targets. group_chunks = GroupIterator(filter(lambda t: t.is_internal, exclusive_chunk), goals_by_group_member.keys()) goal_chunks.extend(group_chunks) self._context.log.debug('::: created chunks(%d)' % len(goal_chunks)) for i, (group_member, goal_chunk) in enumerate(goal_chunks): self._context.log.debug(' chunk(%d) [flavor=%s]:\n\t%s' % ( i, group_member.name, '\n\t'.join(sorted(map(str, goal_chunk))))) for group_member, goal_chunk in goal_chunks: goal = goals_by_group_member[group_member] execution_phases[self._phase].add((group_name, goal.name)) with self._context.new_workunit(name=goal.name, labels=[WorkUnit.GOAL]): execute_task(goal, self._tasks_by_goal[goal], goal_chunk) if explain: tasks_by_goalname = dict((goal.name, task.__class__.__name__) for goal, task in self._tasks_by_goal.items()) def expand_goal(goal): if len(goal) == 2: # goal is (group, goal) group_name, goal_name = goal task_name = tasks_by_goalname[goal_name] return "%s:%s->%s" % (group_name, goal_name, task_name) else: task_name = tasks_by_goalname[goal] return "%s->%s" % (goal, task_name) for phase, goals in execution_phases.items(): goal_to_task = ", ".join(expand_goal(goal) for goal in goals) print("%s [%s]" % (phase, goal_to_task))
class PerPathDatapoints(Thread): PURGE_SLEEP_TIME = 2 # sleep time between purging old datapoints DEFAULT_TOP_RESULTS = 10 # number of (top) results to show by default def __init__(self, older_than=120, aggregation_depth=0): """ datapoints that are `older_than` will be dropped if aggregation_depth > 0 then we aggregate for paths up to that depth """ self._older_than = older_than self._aggregation_depth = aggregation_depth self._requests_by_timestamp = OrderedDict() self._lock = Lock() super(PerPathDatapoints, self).__init__() def size(self): size = {"samples": 0, "requests_mem_usage": 0} with self._lock: samples, mem_usage = 0, 0 for reqs in self._requests_by_timestamp.values(): samples += len(reqs) mem_usage += sum(sys.getsizeof(r) for r in reqs) size["samples"] = samples size["requests_mem_usage"] = mem_usage size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"]) size["ordered_dict_mem_usage"] = sizeof_fmt( sys.getsizeof(self._requests_by_timestamp)) return size def run(self): """ drop samples that are too old """ while True: time.sleep(self.PURGE_SLEEP_TIME) old_tstamp = time.time() - self._older_than with self._lock: for tstamp in self._requests_by_timestamp.keys(): if tstamp < old_tstamp: del self._requests_by_timestamp[tstamp] def handle_request(self, request): if self._aggregation_depth > 0: request.path = intern(request.parent_path(self._aggregation_depth)) with self._lock: tstamp = int(time.time()) if tstamp not in self._requests_by_timestamp: self._requests_by_timestamp[tstamp] = [] self._requests_by_timestamp[tstamp].append(request) def sum_minute(self, top=DEFAULT_TOP_RESULTS, order_by=Counters.WRITES, display=[Counters.ALL], view=AccumulatedStats.VIEW_BY_PATH): now = int(time.time()) old = now - NUMBER_OF_DATAPOINTS stats = AccumulatedStats(StatsConfig()) with self._lock: # note that this is an OrderedDict so samples are in chronological order for tstamp in self._requests_by_timestamp.keys(): if tstamp < old: continue if tstamp > now: break for r in self._requests_by_timestamp[tstamp]: stats.handle_request(r) return stats.dict(top=top, order_by=order_by, display_filters=display, view=view) def datapoints_writes(self): return self._filter_datapoints(condition=lambda req: req.is_write) def datapoints_reads(self): return self._filter_datapoints(condition=lambda req: not req.is_write) def datapoints_for_op(self, op): return self._filter_datapoints(condition=lambda req: req.opcode == op) def datapoints_by_path_for_op(self, op, top): """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc. because why use Python if you can't abuse types? top is the number of results """ if op == "writes": return self._datapoints_by_path_for_op_impl( lambda r: r.is_write, top) elif op == "reads": return self._datapoints_by_path_for_op_impl( lambda r: not r.is_write, top) else: return self._datapoints_by_path_for_op_impl( lambda r: r.opcode == op, top) def _datapoints_by_path_for_op_impl(self, request_filter, top): """ to make this moderately efficient we use a dict that provides a pre-populated list of datapoints. """ tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS datapoints = PathDatapoints() with self._lock: for i in range(0, NUMBER_OF_DATAPOINTS): if tstamp in self._requests_by_timestamp: for req in self._requests_by_timestamp[tstamp]: if request_filter(req): dp = datapoints[req.path][i][1] + 1 datapoints[req.path][i] = (i, dp) tstamp += 1 # sort def comparator(path_a, path_b): sum_a = sum(d[1] for d in datapoints[path_a]) sum_b = sum(d[1] for d in datapoints[path_b]) return sum_b - sum_a paths = sorted(datapoints.keys(), comparator) if len(paths) == 0: return [("/", datapoints["/"])] return [(p, datapoints[p]) for p in paths[0:top]] def _filter_datapoints(self, condition): tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS datapoints = [] for i in range(0, NUMBER_OF_DATAPOINTS): aggregate = sum( bool(condition(req)) for req in self._requests_by_timestamp.get(tstamp, [])) datapoints.append((i, aggregate)) tstamp += 1 return datapoints
class PerPathDatapoints(Thread): PURGE_SLEEP_TIME = 2 # sleep time between purging old datapoints DEFAULT_TOP_RESULTS = 10 # number of (top) results to show by default def __init__(self, older_than=120, aggregation_depth=0): """ datapoints that are `older_than` will be dropped if aggregation_depth > 0 then we aggregate for paths up to that depth """ self._older_than = older_than self._aggregation_depth = aggregation_depth self._requests_by_timestamp = OrderedDict() self._lock = Lock() super(PerPathDatapoints, self).__init__() def size(self): size = {"samples": 0, "requests_mem_usage": 0} with self._lock: samples, mem_usage = 0, 0 for reqs in self._requests_by_timestamp.values(): samples += len(reqs) mem_usage += sum(sys.getsizeof(r) for r in reqs) size["samples"] = samples size["requests_mem_usage"] = mem_usage size["requests_mem_usage"] = sizeof_fmt(size["requests_mem_usage"]) size["ordered_dict_mem_usage"] = sizeof_fmt(sys.getsizeof(self._requests_by_timestamp)) return size def run(self): """ drop samples that are too old """ while True: time.sleep(self.PURGE_SLEEP_TIME) old_tstamp = time.time() - self._older_than with self._lock: for tstamp in self._requests_by_timestamp.keys(): if tstamp < old_tstamp: del self._requests_by_timestamp[tstamp] def handle_request(self, request): if self._aggregation_depth > 0: request.path = intern(request.parent_path(self._aggregation_depth)) with self._lock: tstamp = int(time.time()) if tstamp not in self._requests_by_timestamp: self._requests_by_timestamp[tstamp] = [] self._requests_by_timestamp[tstamp].append(request) def sum_minute(self, top=DEFAULT_TOP_RESULTS, order_by=Counters.WRITES, display=[Counters.ALL], view=AccumulatedStats.VIEW_BY_PATH): now = int(time.time()) old = now - NUMBER_OF_DATAPOINTS stats = AccumulatedStats(StatsConfig()) with self._lock: # note that this is an OrderedDict so samples are in chronological order for tstamp in self._requests_by_timestamp.keys(): if tstamp < old: continue if tstamp > now: break for r in self._requests_by_timestamp[tstamp]: stats.handle_request(r) return stats.dict(top=top, order_by=order_by, display_filters=display, view=view) def datapoints_writes(self): return self._filter_datapoints(condition=lambda req: req.is_write) def datapoints_reads(self): return self._filter_datapoints(condition=lambda req: not req.is_write) def datapoints_for_op(self, op): return self._filter_datapoints(condition=lambda req: req.opcode == op) def datapoints_by_path_for_op(self, op, top): """ op is "writes" or "reads" or one of OpCodes.CREATE, OpCodes.SETDATA, etc. because why use Python if you can't abuse types? top is the number of results """ if op == "writes": return self._datapoints_by_path_for_op_impl(lambda r: r.is_write, top) elif op == "reads": return self._datapoints_by_path_for_op_impl(lambda r: not r.is_write, top) else: return self._datapoints_by_path_for_op_impl(lambda r: r.opcode == op, top) def _datapoints_by_path_for_op_impl(self, request_filter, top): """ to make this moderately efficient we use a dict that provides a pre-populated list of datapoints. """ tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS datapoints = PathDatapoints() with self._lock: for i in range(0, NUMBER_OF_DATAPOINTS): if tstamp in self._requests_by_timestamp: for req in self._requests_by_timestamp[tstamp]: if request_filter(req): dp = datapoints[req.path][i][1] + 1 datapoints[req.path][i] = (i, dp) tstamp += 1 # sort def comparator(path_a, path_b): sum_a = sum(d[1] for d in datapoints[path_a]) sum_b = sum(d[1] for d in datapoints[path_b]) return sum_b - sum_a paths = sorted(datapoints.keys(), comparator) if len(paths) == 0: return [("/", datapoints["/"])] return [(p, datapoints[p]) for p in paths[0:top]] def _filter_datapoints(self, condition): tstamp = int(time.time()) - NUMBER_OF_DATAPOINTS datapoints = [] for i in range(0, NUMBER_OF_DATAPOINTS): aggregate = sum(bool(condition(req)) for req in self._requests_by_timestamp.get(tstamp, [])) datapoints.append((i, aggregate)) tstamp += 1 return datapoints