Python _ls_pre_yarn_task_syslogs Exemples, mrjob.logs.ls._ls_pre_yarn_task_syslogs Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_ls.py Projet : BeeswaxIO/mrjob

    def test_filter_and_sort(self):
        # on EMR, looks like attempts are grouped in subdirectories

        self.mock_paths = [
            '/userlogs/attempt_201512232143_0008_m_000001_3/syslog',
            '/userlogs/attempt_201512232143_0008_r_000000_0/syslog',
            '/userlogs/attempt_201512232143_0008_m_000003_1/syslog',
            '/userlogs/attempt_201512232143_0006_m_000000_0/syslog',
            '/userlogs/attempt_201512232143_0006_m_000000_0/stderr',
            '/userlogs/random-crud',
        ]

        # should be sorted in reverse order by app and container ID
        self.assertEqual(
            _ls_pre_yarn_task_syslogs(
                self.mock_fs, ['/userlogs']), [
                    '/userlogs/attempt_201512232143_0008_r_000000_0/syslog',
                    '/userlogs/attempt_201512232143_0008_m_000001_3/syslog',
                    '/userlogs/attempt_201512232143_0008_m_000003_1/syslog',
                    '/userlogs/attempt_201512232143_0006_m_000000_0/syslog',
                ])

        # test filter by job ID
        self.assertEqual(
            _ls_pre_yarn_task_syslogs(
                self.mock_fs, ['/userlogs'],
                job_id='job_201512232143_0006'),
            ['/userlogs/attempt_201512232143_0006_m_000000_0/syslog'])

Exemple #2

0

Afficher le fichier

    def test_filter_and_sort(self):
        # on EMR, looks like attempts are grouped in subdirectories

        self.mock_paths = [
            '/userlogs/attempt_201512232143_0008_m_000001_3/syslog',
            '/userlogs/attempt_201512232143_0008_r_000000_0/syslog',
            '/userlogs/attempt_201512232143_0008_m_000003_1/syslog',
            '/userlogs/attempt_201512232143_0006_m_000000_0/syslog',
            '/userlogs/attempt_201512232143_0006_m_000000_0/stderr',
            '/userlogs/random-crud',
        ]

        # should be sorted in reverse order by app and container ID
        self.assertEqual(
            _ls_pre_yarn_task_syslogs(self.mock_fs, ['/userlogs']), [
                '/userlogs/attempt_201512232143_0008_r_000000_0/syslog',
                '/userlogs/attempt_201512232143_0008_m_000001_3/syslog',
                '/userlogs/attempt_201512232143_0008_m_000003_1/syslog',
                '/userlogs/attempt_201512232143_0006_m_000000_0/syslog',
            ])

        # test filter by job ID
        self.assertEqual(
            _ls_pre_yarn_task_syslogs(self.mock_fs, ['/userlogs'],
                                      job_id='job_201512232143_0006'),
            ['/userlogs/attempt_201512232143_0006_m_000000_0/syslog'])

Exemple #3

0

Afficher le fichier

Fichier : test_ls.py Projet : BeeswaxIO/mrjob

    def test_read_logs_from_multiple_dirs(self):
        self.mock_paths = [
            'ssh://node1/logs/attempt_201512232143_0008_m_000000_0/syslog',
            'ssh://node2/logs/attempt_201512232143_0008_r_000000_0/syslog',
            'ssh://node1/etc/sys-stuff',
        ]

        self.assertEqual(
            _ls_pre_yarn_task_syslogs(
                self.mock_fs,
                ['ssh://node1/logs', 'ssh://node2/logs']),
            ['ssh://node2/logs/attempt_201512232143_0008_r_000000_0/syslog',
             'ssh://node1/logs/attempt_201512232143_0008_m_000000_0/syslog',])

Exemple #4

0

Afficher le fichier

def _find_error_in_task_logs(fs,
                             log_dirs_stream,
                             hadoop_version,
                             application_id=None,
                             job_id=None):
    """Given a filesystem and a stream of lists of log dirs to search in,
    find the last error and return details about it. *hadoop_version*
    is required, as task logs have very different paths in YARN.

    In YARN, you must set *application_id*, and pre-YARN, you must set
    *job_id*, or we'll bail out and return None.

    Returns a dictionary with the following keys ("optional" means
    that something may be None):

    syslog: dict with keys:
       path: path of syslog we found error in
       error: error details; dict with keys:
           exception: Java exception (as string)
           stack_trace: array of lines with Java stack trace
       split: optional input split we were reading; dict with keys:
           path: path of input file
           start_line: first line of split (0-indexed)
           num_lines: number of lines in split
    stderr: optional dict with keys:
       path: path of stderr corresponding to syslog
       error: optional error details; dict with keys:
           exception: string  (Python exception)
           traceback: array of lines with Python stack trace
    type: always set to 'task'
    """
    syslog_paths = []

    yarn = uses_yarn(hadoop_version)

    if ((yarn and application_id is None) or (not yarn and job_id is None)):
        return None

    # we assume that each set of log paths contains the same copies
    # of syslogs, so stop once we find any non-empty set of log dirs
    for log_dirs in log_dirs_stream:
        if yarn:
            syslog_paths = _ls_yarn_task_syslogs(fs,
                                                 log_dirs,
                                                 application_id=application_id)
        else:
            syslog_paths = _ls_pre_yarn_task_syslogs(fs,
                                                     log_dirs,
                                                     job_id=job_id)

        if syslog_paths:
            break

    for syslog_path in syslog_paths:
        log.debug('Looking for error in %s' % syslog_path)
        syslog_info = _parse_task_syslog(_cat_log(fs, syslog_path))

        if not syslog_info['error']:
            continue

        # found error! see if we can explain it

        # TODO: don't bother if error wasn't due to child process
        stderr_path = _stderr_for_syslog(syslog_path)

        stderr_info = _parse_python_task_stderr(_cat_log(fs, stderr_path))

        # output error info
        syslog_info['path'] = syslog_path
        stderr_info['path'] = stderr_path

        return dict(type='task', syslog=syslog_info, stderr=stderr_info)

    return None

Exemple #5

0

Afficher le fichier

Fichier : test_ls.py Projet : BeeswaxIO/mrjob

 def test_no_log_dirs(self):
     self.assertEqual(_ls_pre_yarn_task_syslogs(self.mock_fs, []), [])

Exemple #6

0

Afficher le fichier

 def test_no_log_dirs(self):
     self.assertEqual(_ls_pre_yarn_task_syslogs(self.mock_fs, []), [])

Exemple #7

0

Afficher le fichier

Fichier : interpret.py Projet : parastoo-62/mrjob

def _find_error_in_task_logs(fs, log_dirs_stream, hadoop_version,
                             application_id=None, job_id=None):
    """Given a filesystem and a stream of lists of log dirs to search in,
    find the last error and return details about it. *hadoop_version*
    is required, as task logs have very different paths in YARN.

    In YARN, you must set *application_id*, and pre-YARN, you must set
    *job_id*, or we'll bail out and return None.

    Returns a dictionary with the following keys ("optional" means
    that something may be None):

    syslog: dict with keys:
       path: path of syslog we found error in
       error: error details; dict with keys:
           exception: Java exception (as string)
           stack_trace: array of lines with Java stack trace
       split: optional input split we were reading; dict with keys:
           path: path of input file
           start_line: first line of split (0-indexed)
           num_lines: number of lines in split
    stderr: optional dict with keys:
       path: path of stderr corresponding to syslog
       error: optional error details; dict with keys:
           exception: string  (Python exception)
           traceback: array of lines with Python stack trace
    type: always set to 'task'
    """
    syslog_paths = []

    yarn = uses_yarn(hadoop_version)

    if ((yarn and application_id is None) or (not yarn and job_id is None)):
        return None

    # we assume that each set of log paths contains the same copies
    # of syslogs, so stop once we find any non-empty set of log dirs
    for log_dirs in log_dirs_stream:
        if yarn:
            syslog_paths = _ls_yarn_task_syslogs(fs, log_dirs,
                                                 application_id=application_id)
        else:
            syslog_paths = _ls_pre_yarn_task_syslogs(fs, log_dirs,
                                                     job_id=job_id)

        if syslog_paths:
            break

    for syslog_path in syslog_paths:
        log.debug('Looking for error in %s' % syslog_path)
        syslog_info = _parse_task_syslog(_cat_log(fs, syslog_path))

        if not syslog_info['error']:
            continue

        # found error! see if we can explain it

        # TODO: don't bother if error wasn't due to child process
        stderr_path = _stderr_for_syslog(syslog_path)

        stderr_info = _parse_python_task_stderr(_cat_log(fs, stderr_path))

        # output error info
        syslog_info['path'] = syslog_path
        stderr_info['path'] = stderr_path

        return dict(type='task', syslog=syslog_info, stderr=stderr_info)

    return None