Exemple #1
0
    def build_summary_table(self, summary, idx, is_fragment_root, indent_level,
                            new_indent_level, output):
        """Direct translation of Coordinator::PrintExecSummary() to recursively build a list
    of rows of summary statistics, one per exec node

    summary: the TExecSummary object that contains all the summary data

    idx: the index of the node to print

    is_fragment_root: true if the node to print is the root of a fragment (and therefore
    feeds into an exchange)

    indent_level: the number of spaces to print before writing the node's label, to give
    the appearance of a tree. The 0th child of a node has the same indent_level as its
    parent. All other children have an indent_level of one greater than their parent.

    output: the list of rows into which to append the rows produced for this node and its
    children.

    Returns the index of the next exec node in summary.exec_nodes that should be
    processed, used internally to this method only.

    NOTE: This is duplicated in impala_beeswax.py, and changes made here should also be
    made there. TODO: refactor into a shared library. (IMPALA-5792)
    """
        attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"]

        # Initialise aggregate and maximum stats
        agg_stats, max_stats = TExecStats(), TExecStats()
        for attr in attrs:
            setattr(agg_stats, attr, 0)
            setattr(max_stats, attr, 0)

        node = summary.nodes[idx]
        if node.exec_stats is not None:
            for stats in node.exec_stats:
                for attr in attrs:
                    val = getattr(stats, attr)
                    if val is not None:
                        setattr(agg_stats, attr,
                                getattr(agg_stats, attr) + val)
                        setattr(max_stats, attr,
                                max(getattr(max_stats, attr), val))

        if node.exec_stats is not None and node.exec_stats:
            avg_time = agg_stats.latency_ns / len(node.exec_stats)
        else:
            avg_time = 0

        # If the node is a broadcast-receiving exchange node, the cardinality of rows produced
        # is the max over all instances (which should all have received the same number of
        # rows). Otherwise, the cardinality is the sum over all instances which process
        # disjoint partitions.
        if node.is_broadcast:
            cardinality = max_stats.cardinality
        else:
            cardinality = agg_stats.cardinality

        est_stats = node.estimated_stats
        label_prefix = ""
        if indent_level > 0:
            label_prefix = "|"
            label_prefix += "  |" * (indent_level - 1)
            if new_indent_level:
                label_prefix += "--"
            else:
                label_prefix += "  "

        def prettyprint(val, units, divisor):
            for unit in units:
                if val < divisor:
                    if unit == units[0]:
                        return "%d%s" % (val, unit)
                    else:
                        return "%3.2f%s" % (val, unit)
                val /= divisor

        def prettyprint_bytes(byte_val):
            return prettyprint(byte_val, [' B', ' KB', ' MB', ' GB', ' TB'],
                               1024.0)

        def prettyprint_units(unit_val):
            return prettyprint(unit_val, ["", "K", "M", "B"], 1000.0)

        def prettyprint_time(time_val):
            return prettyprint(time_val, ["ns", "us", "ms", "s"], 1000.0)

        hosts = 0
        if node.exec_stats is not None:
            hosts = len(node.exec_stats)
        row = [
            label_prefix + node.label, hosts,
            prettyprint_time(avg_time),
            prettyprint_time(max_stats.latency_ns),
            prettyprint_units(cardinality),
            prettyprint_units(est_stats.cardinality),
            prettyprint_bytes(max_stats.memory_used),
            prettyprint_bytes(est_stats.memory_used), node.label_detail
        ]

        output.append(row)
        try:
            sender_idx = summary.exch_to_sender_map[idx]
            # This is an exchange node, so the sender is a fragment root, and should be printed
            # next.
            self.build_summary_table(summary, sender_idx, True, indent_level,
                                     False, output)
        except (KeyError, TypeError):
            # Fall through if idx not in map, or if exch_to_sender_map itself is not set
            pass

        idx += 1
        if node.num_children > 0:
            first_child_output = []
            idx = \
              self.build_summary_table(
                  summary, idx, False, indent_level, False, first_child_output)
            for child_idx in xrange(1, node.num_children):
                # All other children are indented (we only have 0, 1 or 2 children for every exec
                # node at the moment)
                idx = self.build_summary_table(summary, idx, False,
                                               indent_level + 1, True, output)
            output += first_child_output
        return idx
Exemple #2
0
  def __build_summary_table(self, summary, idx, is_fragment_root, indent_level,
      new_indent_level, output):
    """NOTE: This was taken impala_shell.py. This method will be a placed in a library
    that is shared between impala_shell and this file.

    Direct translation of Coordinator::PrintExecSummary() to recursively build a list
    of rows of summary statistics, one per exec node

    summary: the TExecSummary object that contains all the summary data

    idx: the index of the node to print

    is_fragment_root: true if the node to print is the root of a fragment (and therefore
    feeds into an exchange)

    indent_level: the number of spaces to print before writing the node's label, to give
    the appearance of a tree. The 0th child of a node has the same indent_level as its
    parent. All other children have an indent_level of one greater than their parent.

    new_indent_level: If true, this indent level is different from the previous row's.

    output: the list of rows into which to append the rows produced for this node and its
    children.

    Returns the index of the next exec node in summary.exec_nodes that should be
    processed, used internally to this method only.
    """
    attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"]

    # Initialise aggregate and maximum stats
    agg_stats, max_stats = TExecStats(), TExecStats()
    for attr in attrs:
      setattr(agg_stats, attr, 0)
      setattr(max_stats, attr, 0)

    node = summary.nodes[idx]
    for stats in node.exec_stats:
      for attr in attrs:
        val = getattr(stats, attr)
        if val is not None:
          setattr(agg_stats, attr, getattr(agg_stats, attr) + val)
          setattr(max_stats, attr, max(getattr(max_stats, attr), val))

    if len(node.exec_stats) > 0:
      avg_time = agg_stats.latency_ns / len(node.exec_stats)
    else:
      avg_time = 0

    # If the node is a broadcast-receiving exchange node, the cardinality of rows produced
    # is the max over all instances (which should all have received the same number of
    # rows). Otherwise, the cardinality is the sum over all instances which process
    # disjoint partitions.
    if node.is_broadcast and is_fragment_root:
      cardinality = max_stats.cardinality
    else:
      cardinality = agg_stats.cardinality

    est_stats = node.estimated_stats

    label_prefix = ""
    if indent_level > 0:
      label_prefix = "|"
      label_prefix += "  |" * (indent_level - 1)
      if new_indent_level:
        label_prefix += "--"
      else:
        label_prefix += "  "

    row = {}
    row["prefix"] = label_prefix
    row["operator"] = node.label
    row["num_hosts"] = len(node.exec_stats)
    row["avg_time"] = avg_time
    row["max_time"] = max_stats.latency_ns
    row["num_rows"] = cardinality
    row["est_num_rows"] = est_stats.cardinality
    row["peak_mem"] = max_stats.memory_used
    row["est_peak_mem"] = est_stats.memory_used
    row["detail"] = node.label_detail
    output.append(row)

    try:
      sender_idx = summary.exch_to_sender_map[idx]
      # This is an exchange node, so the sender is a fragment root, and should be printed
      # next.
      self.__build_summary_table(summary, sender_idx, True, indent_level, False, output)
    except (KeyError, TypeError):
      # Fall through if idx not in map, or if exch_to_sender_map itself is not set
      pass

    idx += 1
    if node.num_children > 0:
      first_child_output = []
      idx = \
        self.__build_summary_table(
            summary, idx, False, indent_level, False, first_child_output)
      for child_idx in xrange(1, node.num_children):
        # All other children are indented (we only have 0, 1 or 2 children for every exec
        # node at the moment)
        idx = self.__build_summary_table(
            summary, idx, False, indent_level + 1, True, output)
      output += first_child_output
    return idx