Example #1
0
    def _interpret_task_logs(self, log_interpretation, partial=True):
        """Fetch task syslogs and stderr, and add 'task' to interpretation."""
        if 'task' in log_interpretation and (
                partial or not log_interpretation['task'].get('partial')):
            return   # already interpreted

        step_interpretation = log_interpretation.get('step') or {}

        application_id = step_interpretation.get('application_id')
        job_id = step_interpretation.get('job_id')
        output_dir = step_interpretation.get('output_dir')

        yarn = uses_yarn(self.get_hadoop_version())

        if yarn:
            if not application_id:
                log.warning("Can't fetch task logs; missing application ID")
                return
        else:
            if not job_id:
                log.warning("Can't fetch task logs; missing job ID")
                return

        log_interpretation['task'] = _interpret_task_logs(
            self.fs,
            self._ls_task_syslogs(
                application_id=application_id,
                job_id=job_id,
                output_dir=output_dir),
            partial=partial,
            stderr_callback=_log_parsing_task_stderr)
Example #2
0
    def _interpret_task_logs(self, log_interpretation, partial=True):
        """Fetch task syslogs and stderr, and add 'task' to interpretation."""
        if 'task' in log_interpretation and (
                partial or not log_interpretation['task'].get('partial')):
            return   # already interpreted

        step_interpretation = log_interpretation.get('step') or {}

        application_id = step_interpretation.get('application_id')
        job_id = step_interpretation.get('job_id')
        output_dir = step_interpretation.get('output_dir')

        yarn = uses_yarn(self.get_hadoop_version())

        if yarn:
            if not application_id:
                if not log_interpretation.get('no_job'):
                    log.warning(
                        "Can't fetch task logs; missing application ID")
                return
        else:
            if not job_id:
                if not log_interpretation.get('no_job'):
                    log.warning("Can't fetch task logs; missing job ID")
                return

        log_interpretation['task'] = _interpret_task_logs(
            self.fs,
            self._ls_task_syslogs(
                application_id=application_id,
                job_id=job_id,
                output_dir=output_dir),
            partial=partial,
            stderr_callback=_log_parsing_task_stderr)
Example #3
0
    def _interpret_task_logs(self, log_interpretation):
        """Find and interpret the task logs, storing the
        interpretation in ``log_interpretation['task']``."""
        if 'task' not in log_interpretation:
            # get job/application ID from output of hadoop command
            step_interpretation = log_interpretation.get('step') or {}
            application_id = step_interpretation.get('application_id')
            job_id = step_interpretation.get('job_id')

            yarn = uses_yarn(self.get_hadoop_version())

            if yarn and application_id is None:
                log.warning("Can't fetch task logs without application ID")
                return {}
            elif not yarn and job_id is None:
                log.warning("Can't fetch task logs without job ID")
                return {}

            # Note: this is unlikely to be super-helpful on "real" (multi-node)
            # pre-YARN Hadoop because task logs aren't generally shipped to a
            # local directory. It's a start, anyways. See #1201.
            def stream_task_log_dirs():
                for log_dir in unique(
                        self._hadoop_log_dirs(
                            output_dir=step_interpretation.get('output_dir'))):

                    if yarn:
                        path = self.fs.join(log_dir, 'userlogs',
                                            application_id)
                    else:
                        # sometimes pre-YARN attempt logs are organized by
                        # job_id,
                        # sometimes not. Play it safe
                        path = self.fs.join(log_dir, 'userlogs')

                    if self.fs.exists(path):
                        log.info('Scanning task syslogs in %s' % path)
                        yield [path]

            # wrap _ls_task_syslogs() to add logging
            def ls_task_syslogs():
                # there should be at most one history log
                for match in _ls_task_syslogs(self.fs,
                                              stream_task_log_dirs(),
                                              application_id=application_id,
                                              job_id=job_id):

                    # TODO: this isn't really correct because
                    # _interpret_task_logs() sorts the logs paths and
                    # scans starting at the most recent one. Probably
                    # should have _ls_task_syslogs() do the sorting.
                    log.info('  Scanning for errors: %s' % match['path'])
                    yield match

            log_interpretation['task'] = _interpret_task_logs(
                self.fs, ls_task_syslogs())

        return log_interpretation['task']
Example #4
0
    def _interpret_task_logs(self, log_interpretation):
        """Find and interpret the task logs, storing the
        interpretation in ``log_interpretation['task']``."""
        if 'task' not in log_interpretation:
            # get job/application ID from output of hadoop command
            step_interpretation = log_interpretation.get('step') or {}
            application_id = step_interpretation.get('application_id')
            job_id = step_interpretation.get('job_id')

            yarn = uses_yarn(self.get_hadoop_version())

            if yarn and application_id is None:
                log.warning("Can't fetch task logs without application ID")
                return {}
            elif not yarn and job_id is None:
                log.warning("Can't fetch task logs without job ID")
                return {}

            # Note: this is unlikely to be super-helpful on "real" (multi-node)
            # pre-YARN Hadoop because task logs aren't generally shipped to a
            # local directory. It's a start, anyways. See #1201.
            def stream_task_log_dirs():
                for log_dir in unique(
                    self._hadoop_log_dirs(
                        output_dir=step_interpretation.get('output_dir'))):

                    if yarn:
                        path = self.fs.join(
                            log_dir, 'userlogs', application_id)
                    else:
                        # sometimes pre-YARN attempt logs are organized by
                        # job_id,
                        # sometimes not. Play it safe
                        path = self.fs.join(log_dir, 'userlogs')

                    if self.fs.exists(path):
                        log.info('Scanning task syslogs in %s' % path)
                        yield [path]

            # wrap _ls_task_syslogs() to add logging
            def ls_task_syslogs():
                # there should be at most one history log
                for match in _ls_task_syslogs(
                        self.fs, stream_task_log_dirs(),
                        application_id=application_id, job_id=job_id):

                    # TODO: this isn't really correct because
                    # _interpret_task_logs() sorts the logs paths and
                    # scans starting at the most recent one. Probably
                    # should have _ls_task_syslogs() do the sorting.
                    log.info('  Scanning for errors: %s' % match['path'])
                    yield match

            log_interpretation['task'] = _interpret_task_logs(
                self.fs, ls_task_syslogs())

        return log_interpretation['task']
Example #5
0
 def interpret_task_logs(self, **kwargs):
     return _interpret_task_logs(self.mock_fs,
                                 self.mock_path_matches(),
                                 log_callback=self.mock_log_callback,
                                 **kwargs)
Example #6
0
 def interpret_task_logs(self, **kwargs):
     return _interpret_task_logs(
         self.mock_fs, self.mock_path_matches(), **kwargs)