def scan_for_counters_in_files(log_file_uris, fs, hadoop_version): """Scan *log_file_uris* for counters, using *fs* for file system access """ counters = {} relevant_logs = [] # list of (sort key, URI) for log_file_uri in log_file_uris: m = _JOB_LOG_PATH_RE.match(log_file_uri) if not m: continue relevant_logs.append((int(m.group("step_num")), log_file_uri)) relevant_logs.sort() for _, log_file_uri in relevant_logs: log_lines = fs.cat(log_file_uri) if not log_lines: continue for line in log_lines: new_counters, step_num = parse_hadoop_counters_from_line(line, hadoop_version) if new_counters: counters[step_num] = new_counters return counters
def scan_for_counters_in_files(log_file_uris, fs, hadoop_version): """Scan *log_file_uris* for counters, using *fs* for file system access """ counters = {} relevant_logs = [] # list of (sort key, URI) for log_file_uri in log_file_uris: m = _JOB_LOG_PATH_RE.match(log_file_uri) if not m: continue relevant_logs.append((int(m.group('step_num')), log_file_uri)) relevant_logs.sort() for _, log_file_uri in relevant_logs: log_lines = fs.cat(log_file_uri) if not log_lines: continue for line in log_lines: new_counters, step_num = (parse_hadoop_counters_from_line( line, hadoop_version)) if new_counters: counters[step_num] = new_counters return counters
def test_job_log_path_re_on_3_x_ami(self): uri = 'ssh://ec2-52-24-131-73.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/2015/08/31/000000/job_1441057410014_0011-1441057493406-hadoop-streamjob6928722756977481487.jar-1441057604210-2-1-SUCCEEDED-default-1441057523674.jhist' # noqa m = _JOB_LOG_PATH_RE.match(uri) self.assertTrue(m) self.assertEqual(m.group('timestamp'), '1441057410014') self.assertEqual(m.group('step_num'), '0011') self.assertEqual(m.group('user'), 'hadoop')
def test_job_log_path_re_on_2_x_ami(self): uri = 'ssh://ec2-52-88-7-250.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/done/version-1/ip-172-31-29-201.us-west-2.compute.internal_1441062912502_/2015/08/31/000000/job_201508312315_0011_1441062985499_hadoop_streamjob1474198573915234945.jar' # noqa m = _JOB_LOG_PATH_RE.match(uri) self.assertTrue(m) self.assertEqual(m.group('timestamp'), '201508312315') self.assertEqual(m.group('step_num'), '0011') self.assertEqual(m.group('user'), 'hadoop')