Example #1
0
def scan_for_counters_in_files(log_file_uris, fs, hadoop_version):
    """Scan *log_file_uris* for counters, using *fs* for file system access
    """
    counters = {}
    relevant_logs = []  # list of (sort key, URI)

    for log_file_uri in log_file_uris:
        m = _JOB_LOG_PATH_RE.match(log_file_uri)
        if not m:
            continue

        relevant_logs.append((int(m.group("step_num")), log_file_uri))

    relevant_logs.sort()

    for _, log_file_uri in relevant_logs:
        log_lines = fs.cat(log_file_uri)
        if not log_lines:
            continue

        for line in log_lines:
            new_counters, step_num = parse_hadoop_counters_from_line(line, hadoop_version)
            if new_counters:
                counters[step_num] = new_counters
    return counters
Example #2
0
def scan_for_counters_in_files(log_file_uris, fs, hadoop_version):
    """Scan *log_file_uris* for counters, using *fs* for file system access
    """
    counters = {}
    relevant_logs = []  # list of (sort key, URI)

    for log_file_uri in log_file_uris:
        m = _JOB_LOG_PATH_RE.match(log_file_uri)
        if not m:
            continue

        relevant_logs.append((int(m.group('step_num')), log_file_uri))

    relevant_logs.sort()

    for _, log_file_uri in relevant_logs:
        log_lines = fs.cat(log_file_uri)
        if not log_lines:
            continue

        for line in log_lines:
            new_counters, step_num = (parse_hadoop_counters_from_line(
                line, hadoop_version))
            if new_counters:
                counters[step_num] = new_counters
    return counters
Example #3
0
    def test_job_log_path_re_on_3_x_ami(self):
        uri = 'ssh://ec2-52-24-131-73.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/2015/08/31/000000/job_1441057410014_0011-1441057493406-hadoop-streamjob6928722756977481487.jar-1441057604210-2-1-SUCCEEDED-default-1441057523674.jhist'  # noqa

        m = _JOB_LOG_PATH_RE.match(uri)

        self.assertTrue(m)
        self.assertEqual(m.group('timestamp'), '1441057410014')
        self.assertEqual(m.group('step_num'), '0011')
        self.assertEqual(m.group('user'), 'hadoop')
Example #4
0
    def test_job_log_path_re_on_2_x_ami(self):
        uri = 'ssh://ec2-52-88-7-250.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/done/version-1/ip-172-31-29-201.us-west-2.compute.internal_1441062912502_/2015/08/31/000000/job_201508312315_0011_1441062985499_hadoop_streamjob1474198573915234945.jar'  # noqa

        m = _JOB_LOG_PATH_RE.match(uri)

        self.assertTrue(m)
        self.assertEqual(m.group('timestamp'), '201508312315')
        self.assertEqual(m.group('step_num'), '0011')
        self.assertEqual(m.group('user'), 'hadoop')
Example #5
0
    def test_job_log_path_re_on_3_x_ami(self):
        uri = 'ssh://ec2-52-24-131-73.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/2015/08/31/000000/job_1441057410014_0011-1441057493406-hadoop-streamjob6928722756977481487.jar-1441057604210-2-1-SUCCEEDED-default-1441057523674.jhist'  # noqa

        m = _JOB_LOG_PATH_RE.match(uri)

        self.assertTrue(m)
        self.assertEqual(m.group('timestamp'), '1441057410014')
        self.assertEqual(m.group('step_num'), '0011')
        self.assertEqual(m.group('user'), 'hadoop')
Example #6
0
    def test_job_log_path_re_on_2_x_ami(self):
        uri = 'ssh://ec2-52-88-7-250.us-west-2.compute.amazonaws.com/mnt/var/log/hadoop/history/done/version-1/ip-172-31-29-201.us-west-2.compute.internal_1441062912502_/2015/08/31/000000/job_201508312315_0011_1441062985499_hadoop_streamjob1474198573915234945.jar'  # noqa

        m = _JOB_LOG_PATH_RE.match(uri)

        self.assertTrue(m)
        self.assertEqual(m.group('timestamp'), '201508312315')
        self.assertEqual(m.group('step_num'), '0011')
        self.assertEqual(m.group('user'), 'hadoop')