def build_summary_table(self, summary, idx, is_fragment_root, indent_level, new_indent_level, output): """Direct translation of Coordinator::PrintExecSummary() to recursively build a list of rows of summary statistics, one per exec node summary: the TExecSummary object that contains all the summary data idx: the index of the node to print is_fragment_root: true if the node to print is the root of a fragment (and therefore feeds into an exchange) indent_level: the number of spaces to print before writing the node's label, to give the appearance of a tree. The 0th child of a node has the same indent_level as its parent. All other children have an indent_level of one greater than their parent. output: the list of rows into which to append the rows produced for this node and its children. Returns the index of the next exec node in summary.exec_nodes that should be processed, used internally to this method only. NOTE: This is duplicated in impala_beeswax.py, and changes made here should also be made there. TODO: refactor into a shared library. (IMPALA-5792) """ attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"] # Initialise aggregate and maximum stats agg_stats, max_stats = TExecStats(), TExecStats() for attr in attrs: setattr(agg_stats, attr, 0) setattr(max_stats, attr, 0) node = summary.nodes[idx] if node.exec_stats is not None: for stats in node.exec_stats: for attr in attrs: val = getattr(stats, attr) if val is not None: setattr(agg_stats, attr, getattr(agg_stats, attr) + val) setattr(max_stats, attr, max(getattr(max_stats, attr), val)) if node.exec_stats is not None and node.exec_stats: avg_time = agg_stats.latency_ns / len(node.exec_stats) else: avg_time = 0 # If the node is a broadcast-receiving exchange node, the cardinality of rows produced # is the max over all instances (which should all have received the same number of # rows). Otherwise, the cardinality is the sum over all instances which process # disjoint partitions. if node.is_broadcast: cardinality = max_stats.cardinality else: cardinality = agg_stats.cardinality est_stats = node.estimated_stats label_prefix = "" if indent_level > 0: label_prefix = "|" label_prefix += " |" * (indent_level - 1) if new_indent_level: label_prefix += "--" else: label_prefix += " " def prettyprint(val, units, divisor): for unit in units: if val < divisor: if unit == units[0]: return "%d%s" % (val, unit) else: return "%3.2f%s" % (val, unit) val /= divisor def prettyprint_bytes(byte_val): return prettyprint(byte_val, [' B', ' KB', ' MB', ' GB', ' TB'], 1024.0) def prettyprint_units(unit_val): return prettyprint(unit_val, ["", "K", "M", "B"], 1000.0) def prettyprint_time(time_val): return prettyprint(time_val, ["ns", "us", "ms", "s"], 1000.0) hosts = 0 if node.exec_stats is not None: hosts = len(node.exec_stats) row = [ label_prefix + node.label, hosts, prettyprint_time(avg_time), prettyprint_time(max_stats.latency_ns), prettyprint_units(cardinality), prettyprint_units(est_stats.cardinality), prettyprint_bytes(max_stats.memory_used), prettyprint_bytes(est_stats.memory_used), node.label_detail ] output.append(row) try: sender_idx = summary.exch_to_sender_map[idx] # This is an exchange node, so the sender is a fragment root, and should be printed # next. self.build_summary_table(summary, sender_idx, True, indent_level, False, output) except (KeyError, TypeError): # Fall through if idx not in map, or if exch_to_sender_map itself is not set pass idx += 1 if node.num_children > 0: first_child_output = [] idx = \ self.build_summary_table( summary, idx, False, indent_level, False, first_child_output) for child_idx in xrange(1, node.num_children): # All other children are indented (we only have 0, 1 or 2 children for every exec # node at the moment) idx = self.build_summary_table(summary, idx, False, indent_level + 1, True, output) output += first_child_output return idx
def __build_summary_table(self, summary, idx, is_fragment_root, indent_level, new_indent_level, output): """NOTE: This was taken impala_shell.py. This method will be a placed in a library that is shared between impala_shell and this file. Direct translation of Coordinator::PrintExecSummary() to recursively build a list of rows of summary statistics, one per exec node summary: the TExecSummary object that contains all the summary data idx: the index of the node to print is_fragment_root: true if the node to print is the root of a fragment (and therefore feeds into an exchange) indent_level: the number of spaces to print before writing the node's label, to give the appearance of a tree. The 0th child of a node has the same indent_level as its parent. All other children have an indent_level of one greater than their parent. new_indent_level: If true, this indent level is different from the previous row's. output: the list of rows into which to append the rows produced for this node and its children. Returns the index of the next exec node in summary.exec_nodes that should be processed, used internally to this method only. """ attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"] # Initialise aggregate and maximum stats agg_stats, max_stats = TExecStats(), TExecStats() for attr in attrs: setattr(agg_stats, attr, 0) setattr(max_stats, attr, 0) node = summary.nodes[idx] for stats in node.exec_stats: for attr in attrs: val = getattr(stats, attr) if val is not None: setattr(agg_stats, attr, getattr(agg_stats, attr) + val) setattr(max_stats, attr, max(getattr(max_stats, attr), val)) if len(node.exec_stats) > 0: avg_time = agg_stats.latency_ns / len(node.exec_stats) else: avg_time = 0 # If the node is a broadcast-receiving exchange node, the cardinality of rows produced # is the max over all instances (which should all have received the same number of # rows). Otherwise, the cardinality is the sum over all instances which process # disjoint partitions. if node.is_broadcast and is_fragment_root: cardinality = max_stats.cardinality else: cardinality = agg_stats.cardinality est_stats = node.estimated_stats label_prefix = "" if indent_level > 0: label_prefix = "|" label_prefix += " |" * (indent_level - 1) if new_indent_level: label_prefix += "--" else: label_prefix += " " row = {} row["prefix"] = label_prefix row["operator"] = node.label row["num_hosts"] = len(node.exec_stats) row["avg_time"] = avg_time row["max_time"] = max_stats.latency_ns row["num_rows"] = cardinality row["est_num_rows"] = est_stats.cardinality row["peak_mem"] = max_stats.memory_used row["est_peak_mem"] = est_stats.memory_used row["detail"] = node.label_detail output.append(row) try: sender_idx = summary.exch_to_sender_map[idx] # This is an exchange node, so the sender is a fragment root, and should be printed # next. self.__build_summary_table(summary, sender_idx, True, indent_level, False, output) except (KeyError, TypeError): # Fall through if idx not in map, or if exch_to_sender_map itself is not set pass idx += 1 if node.num_children > 0: first_child_output = [] idx = \ self.__build_summary_table( summary, idx, False, indent_level, False, first_child_output) for child_idx in xrange(1, node.num_children): # All other children are indented (we only have 0, 1 or 2 children for every exec # node at the moment) idx = self.__build_summary_table( summary, idx, False, indent_level + 1, True, output) output += first_child_output return idx