Ejemplo n.º 1
0
def summary(profile):
    summary = profile.find_by_name('Summary')
    execution_profile = profile.find_by_name('Execution Profile')
    counter_map = summary.counter_map()
    counter_map_execution_profile = execution_profile.counter_map()
    host_list = models.host_by_metric(profile, 'PeakMemoryUsage', exprs=[max])
    host_list = sorted(host_list, key=lambda x: x[1], reverse=True)
    peak_memory = models.TCounter(
        value=host_list[0][1], unit=3) if host_list else models.TCounter(
            value=0, unit=3)  # The value is not always present
    return [{
        'key': 'PlanningTime',
        'value': counter_map['PlanningTime'].value,
        'unit': counter_map['PlanningTime'].unit
    }, {
        'key': 'RemoteFragmentsStarted',
        'value': counter_map['RemoteFragmentsStarted'].value,
        'unit': counter_map['RemoteFragmentsStarted'].unit
    }, {
        'key': 'TotalTime',
        'value': counter_map_execution_profile['TotalTime'].value,
        'unit': counter_map_execution_profile['TotalTime'].unit
    }, {
        'key': 'PeakMemoryUsage',
        'value': peak_memory.value,
        'unit': peak_memory.unit
    }]
Ejemplo n.º 2
0
        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and int(
                    node_id) in exec_summary_json:

                node.val.counters.append(
                    models.TCounter(
                        name='Hosts',
                        value=exec_summary_json[int(node_id)]["hosts"],
                        unit=0))
                broadcast = 0
                if exec_summary_json[int(node_id)]["broadcast"]:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = counter_map['TotalTime'].value - child_time

                # Make sure to substract the wait time for the exchange node
                if is_plan_node and re.search(r'EXCHANGE_NODE',
                                              node.val.name) is not None:
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value - counter_map[
                        'InactiveTotalTime'].value - async_time

                # For Hash Join, if the "LocalTime" metrics
                if is_plan_node and re.search(r'HASH_JOIN_NODE',
                                              node.val.name) is not None:
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value

                # Add two virtual metrics for local_time and child_time
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))
Ejemplo n.º 3
0
        def add_host(node, exec_summary_json=exec_summary_json):
          is_plan_node = node.is_plan_node()
          node_id = node.id()
           # Setup Hosts & Broadcast
          if node_id and node.is_regular() and int(node_id) in exec_summary_json:
            exec_summary_node = exec_summary_json.get(int(node_id), {})
            node.val.counters.append(models.TCounter(name='Hosts', value=exec_summary_node.get('hosts', ''), unit=0))
            broadcast = 0
            if exec_summary_json[int(node_id)]["broadcast"]:
                broadcast = 1
            node.val.counters.append(models.TCounter(name='Broadcast', value=broadcast, unit=0))

            if exec_summary_node.get('detail') and re.search(r'\w*_SCAN_NODE', node.name(), re.IGNORECASE):
              details = exec_summary_node['detail'].split()
              node.val.info_strings['Table'] = details[0]
              node.val.counters.append(models.TCounter(name='MissingStats', value=missing_stats.get(details[0], 0), unit=0))

          # Setup LocalTime & ChildTime
          if node_id:
            child_time = 0
            for c in node.children:
                if c.is_plan_node():
                    child_time += c.counter_map()['TotalTime'].value

            counter_map = node.counter_map()

            # Load the metric data as if the object would be loaded from the DB
            local_time = counter_map['TotalTime'].value - child_time

            # Make sure to substract the wait time for the exchange node
            if is_plan_node and re.search(r'EXCHANGE_NODE', node.val.name) is not None:
                async_time = counter_map.get('AsyncTotalTime', models.TCounter(value=0)).value
                dequeue = node.find_by_name('Dequeue')
                data_wait_time = dequeue.counter_map().get('DataWaitTime', models.TCounter(value=0)).value if dequeue else 0
                local_time = counter_map['TotalTime'].value - counter_map['InactiveTotalTime'].value - async_time - data_wait_time

            # For Hash Join, if the "LocalTime" metrics
            if is_plan_node and re.search(r'HASH_JOIN_NODE', node.val.name) is not None:
                if ("LocalTime" in counter_map):
                    local_time = counter_map["LocalTime"].value
                else:
                    local_time = counter_map["ProbeTime"].value +\
                        counter_map["BuildTime"].value

            # Add two virtual metrics for local_time and child_time
            node.val.counters.append(models.TCounter(name='LocalTime', value=local_time, unit=5))
            node.val.counters.append(models.TCounter(name='ChildTime', value=child_time, unit=5))
Ejemplo n.º 4
0
    def pre_process(self, profile):
        summary = profile.find_by_name("Summary")
        exec_summary_json = utils.parse_exec_summary(
            summary.val.info_strings.get('ExecSummary')
        ) if summary.val.info_strings.get('ExecSummary') else {}
        stats_mapping = {
            'Query Compilation': {
                'Metadata load finished': 'MetadataLoadTime',
                'Analysis finished': 'AnalysisTime',
                'Single node plan created': 'SinglePlanTime',
                'Runtime filters computed': 'RuntimeFilterTime',
                'Distributed plan created': 'DistributedPlanTime',
                'Lineage info computed': 'LineageTime'
            },
            'Query Timeline': {
                'Planning finished':
                'PlanningTime',
                'Completed admission':
                'AdmittedTime',
                'Rows available':
                'QueryTime',
                'Unregister query':
                'EndTime',
                '((fragment instances)|(remote fragments)|(execution backends).*) started':
                'RemoteFragmentsStarted'
            }
        }
        # Setup Event Sequence
        if summary:
            for s in summary.val.event_sequences:
                sequence_name = s.name
                sequence = stats_mapping.get(sequence_name)
                if sequence:
                    duration = 0
                    for i in range(len(s.labels)):
                        event_name = s.labels[i]
                        event_duration = s.timestamps[i] - duration

                        if sequence.get(event_name):
                            summary.val.counters.append(
                                models.TCounter(name=sequence.get(event_name),
                                                value=event_duration,
                                                unit=5))
                            sequence.pop(event_name)
                        else:
                            for key, value in sequence.iteritems():
                                if re.search(key, event_name, re.IGNORECASE):
                                    summary.val.counters.append(
                                        models.TCounter(name=value,
                                                        value=event_duration,
                                                        unit=5))
                                    sequence.pop(key)
                                    break

                        duration = s.timestamps[i]

            for key, value in stats_mapping.get(
                    'Query Compilation').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))
            for key, value in stats_mapping.get('Query Timeline').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))

            missing_stats = {}
            for key in [
                    'Tables Missing Stats', 'Tables With Corrupt Table Stats'
            ]:
                if summary.val.info_strings.get(key):
                    tables = summary.val.info_strings.get(key).split(',')
                    for table in tables:
                        missing_stats[table] = 1

        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            nid = int(node_id) if node_id and node.is_regular() else -1
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and nid in exec_summary_json:
                exec_summary_node = exec_summary_json.get(nid, {})
                node.val.counters.append(
                    models.TCounter(name='Hosts',
                                    value=exec_summary_node.get('hosts', ''),
                                    unit=0))
                broadcast = 0
                if exec_summary_json[nid]['broadcast']:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

                if exec_summary_node.get('detail') and re.search(
                        r'\w*_SCAN_NODE', node.name(), re.IGNORECASE):
                    details = exec_summary_node['detail'].split()
                    node.val.info_strings['Table'] = details[0]
                    node.val.counters.append(
                        models.TCounter(name='MissingStats',
                                        value=missing_stats.get(details[0], 0),
                                        unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = counter_map['TotalTime'].value - child_time

                # Make sure to substract the wait time for the exchange node
                if is_plan_node and re.search(r'EXCHANGE_NODE',
                                              node.val.name) is not None:
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    inactive_time = counter_map['InactiveTotalTime'].value
                    if inactive_time == 0:
                        dequeue = node.find_by_name('Dequeue')
                        inactive_time = dequeue.counter_map().get(
                            'DataWaitTime', models.TCounter(
                                value=0)).value if dequeue else 0
                    local_time = counter_map[
                        'TotalTime'].value - inactive_time - async_time
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(
                        r'KrpcDataStreamSender',
                        node.val.name) is not None and node.fragment_instance:
                    local_time = counter_map.get(
                        'SerializeBatchTime', models.TCounter(value=0)).value
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(r'HBASE_SCAN_NODE', node.val.name):
                    local_time = counter_map[
                        'TotalTime'].value - counter_map.get(
                            'TotalRawHBaseReadTime(*)',
                            models.TCounter(value=0)).value
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(r'KUDU_SCAN_NODE', node.val.name):
                    child_time = counter_map.get(
                        'KuduClientTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                    counter_map['TotalTime'].value = child_time + local_time
                if re.search(r'HDFS_SCAN_NODE', node.val.name):
                    child_time = counter_map.get(
                        'TotalRawHdfsReadTime(*)',
                        models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                    counter_map['TotalTime'].value = local_time + child_time

                # For Hash Join, if the "LocalTime" metrics
                if is_plan_node and re.search(r'HASH_JOIN_NODE',
                                              node.val.name) is not None:
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value

                # Add two virtual metrics for local_time and child_time
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))

        profile.foreach_lambda(add_host)
Ejemplo n.º 5
0
        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            nid = int(node_id) if node_id and node.is_regular() else -1
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and nid in exec_summary_json:
                exec_summary_node = exec_summary_json.get(nid, {})
                node.val.counters.append(
                    models.TCounter(name='Hosts',
                                    value=exec_summary_node.get('hosts', ''),
                                    unit=0))
                broadcast = 0
                if exec_summary_json[nid]['broadcast']:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

                if exec_summary_node.get('detail') and re.search(
                        r'\w*_SCAN_NODE', node.name(), re.IGNORECASE):
                    details = exec_summary_node['detail'].split()
                    node.val.info_strings['Table'] = details[0]
                    node.val.counters.append(
                        models.TCounter(name='MissingStats',
                                        value=missing_stats.get(details[0], 0),
                                        unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = max(counter_map['TotalTime'].value - child_time,
                                 0)
                has_spilled = False

                node_name = node.name()
                # Make sure to substract the wait time for the exchange node
                if is_plan_node and node_name == 'EXCHANGE_NODE':
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    inactive_time = counter_map['InactiveTotalTime'].value
                    if inactive_time == 0:
                        dequeue = node.find_by_name('Dequeue')
                        inactive_time = dequeue.counter_map().get(
                            'DataWaitTime', models.TCounter(
                                value=0)).value if dequeue else 0
                    local_time = counter_map[
                        'TotalTime'].value - inactive_time - async_time
                    child_time = counter_map['TotalTime'].value - local_time
                elif node_name == 'KrpcDataStreamSender' and node.fragment_instance:
                    inactive_time = counter_map.get(
                        'InactiveTotalTime', models.TCounter(value=0)).value
                    if inactive_time == 0:
                        local_time = counter_map.get(
                            'SerializeBatchTime',
                            models.TCounter(value=0)).value
                    else:
                        local_time = counter_map[
                            'TotalTime'].value - inactive_time
                    child_time = counter_map['TotalTime'].value - local_time
                elif node_name == 'HBASE_SCAN_NODE':
                    local_time = counter_map[
                        'TotalTime'].value - counter_map.get(
                            'TotalRawHBaseReadTime(*)',
                            models.TCounter(value=0)).value
                    child_time = counter_map['TotalTime'].value - local_time
                elif node_name == 'KUDU_SCAN_NODE':
                    child_time = counter_map.get(
                        'KuduClientTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                elif node_name == 'HDFS_SCAN_NODE':
                    child_time = counter_map.get(
                        'TotalRawHdfsReadTime(*)',
                        models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                elif node_name == 'Buffer pool':
                    local_time = counter_map.get(
                        'WriteIoWaitTime',
                        models.TCounter(value=0)).value + counter_map.get(
                            'ReadIoWaitTime',
                            models.TCounter(value=0)).value + counter_map.get(
                                'AllocTime', models.TCounter(value=0)).value
                elif node_name == 'AGGREGATION':
                    grouping_aggregator = node.find_by_name(
                        'GroupingAggregator')
                    if grouping_aggregator and grouping_aggregator.counter_map(
                    ).get('SpilledPartitions',
                          models.TCounter(value=0)).value > 0:
                        has_spilled = True
                elif is_plan_node and node_name == 'HASH_JOIN_NODE':  # For Hash Join, if the "LocalTime" metrics
                    hash_join_builder = node.find_by_name('Hash Join Builder')
                    if hash_join_builder and hash_join_builder.counter_map(
                    ).get('SpilledPartitions',
                          models.TCounter(value=0)).value > 0:
                        has_spilled = True
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value
                if counter_map.get('SpilledPartitions', 0) > 0:
                    has_spilled = True

                # Add two virtual metrics for local_time and child_time
                if has_spilled:
                    spill_time = 0
                    buffer_pool = node.find_by_name('Buffer pool')
                    if buffer_pool:
                        spill_time = buffer_pool.counter_map(
                        )['LocalTime'].value
                    node.val.counters.append(
                        models.TCounter(name='SpillTime',
                                        value=spill_time,
                                        unit=5))
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))
Ejemplo n.º 6
0
    def pre_process(self, profile):
        summary = profile.find_by_name("Summary")
        exec_summary_json = utils.parse_exec_summary(
            summary.val.info_strings['ExecSummary'])
        stats_mapping = {
            'Query Compilation': {
                'Metadata load finished': 'MetadataLoadTime',
                'Analysis finished': 'AnalysisTime',
                'Single node plan created': 'SinglePlanTime',
                'Runtime filters computed': 'RuntimeFilterTime',
                'Distributed plan created': 'DistributedPlanTime',
                'Lineage info computed': 'LineageTime'
            },
            'Query Timeline': {
                'Planning finished':
                'PlanningTime',
                'Completed admission':
                'AdmittedTime',
                'Rows available':
                'QueryTime',
                'Unregister query':
                'EndTime',
                '((fragment instances)|(remote fragments)|(execution backends).*) started':
                'RemoteFragmentsStarted'
            }
        }
        # Setup Event Sequence
        if summary:
            for s in summary.val.event_sequences:
                sequence_name = s.name
                sequence = stats_mapping.get(sequence_name)
                if sequence:
                    duration = 0
                    for i in range(len(s.labels)):
                        event_name = s.labels[i]
                        event_duration = s.timestamps[i] - duration
                        event_value = s.timestamps[i]

                        if sequence.get(event_name):
                            summary.val.counters.append(
                                models.TCounter(name=sequence.get(event_name),
                                                value=event_duration,
                                                unit=5))
                            sequence.pop(event_name)
                        else:
                            for key, value in sequence.iteritems():
                                if re.search(key, event_name, re.IGNORECASE):
                                    summary.val.counters.append(
                                        models.TCounter(name=value,
                                                        value=event_duration,
                                                        unit=5))
                                    sequence.pop(key)
                                    break

                        duration = s.timestamps[i]

            for key, value in stats_mapping.get(
                    'Query Compilation').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))
            for key, value in stats_mapping.get('Query Timeline').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))

        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and int(
                    node_id) in exec_summary_json:

                node.val.counters.append(
                    models.TCounter(
                        name='Hosts',
                        value=exec_summary_json[int(node_id)]["hosts"],
                        unit=0))
                broadcast = 0
                if exec_summary_json[int(node_id)]["broadcast"]:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = counter_map['TotalTime'].value - child_time

                # Make sure to substract the wait time for the exchange node
                if is_plan_node and re.search(r'EXCHANGE_NODE',
                                              node.val.name) is not None:
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value - counter_map[
                        'InactiveTotalTime'].value - async_time

                # For Hash Join, if the "LocalTime" metrics
                if is_plan_node and re.search(r'HASH_JOIN_NODE',
                                              node.val.name) is not None:
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value

                # Add two virtual metrics for local_time and child_time
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))

        profile.foreach_lambda(add_host)