def parse_hadoop_counters_from_line(line, hadoop_version=None): """Parse Hadoop counter values from a log line. The counter log line format changed significantly between Hadoop 0.18 and 0.20, so this function switches between parsers for them. :param line: log line containing counter data :type line: str :return: (counter_dict, step_num) or (None, None) """ # start with 2.x parsing, which parses the entire line as JSON if (hadoop_version is None or version_gte(hadoop_version, '2') or (version_gte(hadoop_version, '0.21') and not version_gte(hadoop_version, '1'))): counters, step_num = _parse_counters_from_line_2_0(line) # if we found something, or if hadoop_version isn't None, return it if counters or hadoop_version: return counters, step_num m = _COUNTER_LINE_RE.match(line) if not m: return None, None if hadoop_version is None: # try both if hadoop_version not specified counters_1, step_num_1 = parse_hadoop_counters_from_line(line, '0.20') if counters_1: return (counters_1, step_num_1) else: return parse_hadoop_counters_from_line(line, '0.18') if uses_020_counters(hadoop_version): parse_func = _parse_counters_0_20 else: parse_func = _parse_counters_0_18 counter_substring = m.group('counters') counters = {} for group, counter, value in parse_func(counter_substring): counters.setdefault(group, {}) counters[group].setdefault(counter, 0) counters[group][counter] += int(value) return counters, int(m.group('step_num'))
def parse_hadoop_counters_from_line(line, hadoop_version=None): """Parse Hadoop counter values from a log line. The counter log line format changed significantly between Hadoop 0.18 and 0.20, so this function switches between parsers for them. :param line: log line containing counter data :type line: str :return: (counter_dict, step_num) or (None, None) """ m = _COUNTER_LINE_RE.match(line) if not m: return None, None if hadoop_version is None: # try both if hadoop_version not specified counters_1, step_num_1 = parse_hadoop_counters_from_line(line, '0.20') if counters_1: return (counters_1, step_num_1) else: return parse_hadoop_counters_from_line(line, '0.18') if uses_020_counters(hadoop_version): parse_func = _parse_counters_0_20 else: parse_func = _parse_counters_0_18 counter_substring = m.group('counters') counters = {} for group, counter, value in parse_func(counter_substring): counters.setdefault(group, {}) counters[group].setdefault(counter, 0) counters[group][counter] += int(value) return counters, int(m.group('step_num'))