コード例 #1
0
ファイル: test_error.py プロジェクト: imtiaz39/mrjob
    def test_hadoop_error(self):
        self.assertEqual(
            _format_error(dict(hadoop_error=dict(message="DevastatingJavaException"))), "DevastatingJavaException"
        )

        self.assertEqual(
            _format_error(dict(hadoop_error=dict(message="DevastatingJavaException", path="history.jhist"))),
            "DevastatingJavaException\n\n(from history.jhist)",
        )

        self.assertEqual(
            _format_error(
                dict(
                    hadoop_error=dict(
                        message="DevastatingJavaException", path="history.jhist", start_line=23, num_lines=1
                    )
                )
            ),
            "DevastatingJavaException\n\n(from line 24 of history.jhist)",
        )

        self.assertEqual(
            _format_error(
                dict(
                    hadoop_error=dict(
                        message="DevastatingJavaException", path="history.jhist", start_line=23, num_lines=3
                    )
                )
            ),
            "DevastatingJavaException\n\n(from lines 24-26 of history.jhist)",
        )
コード例 #2
0
    def test_hadoop_error(self):
        self.assertEqual(
            _format_error(
                dict(hadoop_error=dict(message='DevastatingJavaException'))),
            'DevastatingJavaException')

        self.assertEqual(
            _format_error(
                dict(hadoop_error=dict(message='DevastatingJavaException',
                                       path='history.jhist'))),
            'DevastatingJavaException\n\n(from history.jhist)')

        self.assertEqual(
            _format_error(
                dict(hadoop_error=dict(message='DevastatingJavaException',
                                       path='history.jhist',
                                       start_line=23,
                                       num_lines=1))),
            'DevastatingJavaException\n\n(from line 24 of history.jhist)')

        self.assertEqual(
            _format_error(
                dict(hadoop_error=dict(message='DevastatingJavaException',
                                       path='history.jhist',
                                       start_line=23,
                                       num_lines=3))),
            'DevastatingJavaException\n\n(from lines 24-26 of history.jhist)')
コード例 #3
0
ファイル: test_errors.py プロジェクト: Dean838/mrjob
    def test_hadoop_error(self):
        self.assertEqual(
            _format_error(dict(hadoop_error=dict(
                message='DevastatingJavaException')
            )),
            'DevastatingJavaException')

        self.assertEqual(
            _format_error(dict(hadoop_error=dict(
                message='DevastatingJavaException',
                path='history.jhist'
            ))),
            'DevastatingJavaException\n\n(from history.jhist)')

        self.assertEqual(
            _format_error(dict(hadoop_error=dict(
                message='DevastatingJavaException',
                path='history.jhist',
                start_line=23,
                num_lines=1
            ))),
            'DevastatingJavaException\n\n(from line 24 of history.jhist)')

        self.assertEqual(
            _format_error(dict(hadoop_error=dict(
                message='DevastatingJavaException',
                path='history.jhist',
                start_line=23,
                num_lines=3
            ))),
            'DevastatingJavaException\n\n(from lines 24-26 of history.jhist)')
コード例 #4
0
 def test_split(self):
     self.assertEqual(
         _format_error(
             dict(split=dict(path='very_troubling.log',
                             start_line=665,
                             num_lines=334))),
         '\n\nwhile reading input from lines 666-999 of very_troubling.log')
コード例 #5
0
ファイル: test_errors.py プロジェクト: Dean838/mrjob
 def test_split(self):
     self.assertEqual(
         _format_error(dict(split=dict(
             path='very_troubling.log',
             start_line=665,
             num_lines=334))),
         '\n\nwhile reading input from lines 666-999 of very_troubling.log')
コード例 #6
0
ファイル: local.py プロジェクト: Alberne/mrjob
    def _log_cause_of_error(self, ex):
        if not isinstance(ex, _TaskFailedException):
            # if something went wrong inside mrjob, the stacktrace
            # will bubble up to the top level
            return

        # not using LogInterpretationMixin because it would be overkill

        input_path = self._task_input_path(
            ex.task_type, ex.step_num, ex.task_num)
        stderr_path = self._task_stderr_path(
            ex.task_type, ex.step_num, ex.task_num)

        if self.fs.exists(stderr_path):  # it should, but just to be safe
            # log-parsing code expects "str", not bytes; open in text mode
            with open(stderr_path) as stderr:
                task_error = _parse_task_stderr(stderr)
                if task_error:
                    task_error['path'] = stderr_path
                    log.error('Cause of failure:\n\n%s\n\n' %
                              _format_error(dict(
                                  split=dict(path=input_path),
                                  task_error=task_error)))
                    return

        # fallback if we can't find the error (e.g. the job does something
        # weird to stderr or stack traces)
        log.error('Error while reading from %s:\n' % input_path)
コード例 #7
0
ファイル: local.py プロジェクト: Yelp/mrjob
    def _log_cause_of_error(self, ex):
        if not isinstance(ex, _TaskFailedException):
            # if something went wrong inside mrjob, the stacktrace
            # will bubble up to the top level
            return

        # not using LogInterpretationMixin because it would be overkill

        if not self._opts['read_logs']:
            return

        input_path = self._task_input_path(
            ex.task_type, ex.step_num, ex.task_num)
        stderr_path = self._task_stderr_path(
            ex.task_type, ex.step_num, ex.task_num)

        if self.fs.exists(stderr_path):  # it should, but just to be safe
            # log-parsing code expects "str", not bytes; open in text mode
            with open(stderr_path) as stderr:
                task_error = _parse_task_stderr(stderr)
                if task_error:
                    task_error['path'] = stderr_path
                    log.error('Cause of failure:\n\n%s\n\n' %
                              _format_error(dict(
                                  split=dict(path=input_path),
                                  task_error=task_error)))
                    return

        # fallback if we can't find the error (e.g. the job does something
        # weird to stderr or stack traces)
        log.error('Error while reading from %s:\n' % input_path)
コード例 #8
0
ファイル: test_errors.py プロジェクト: yzhanggithub/mrjob
 def test_trim_spark_stacktrace(self):
     self.assertEqual(
         _format_error(
             dict(spark_error=dict(
                 message=_MULTI_LINE_ERROR[37:],
                 start_line=0,
                 num_lines=10,
             ))), _MULTI_LINE_ERROR[37:423])
コード例 #9
0
ファイル: test_errors.py プロジェクト: okomestudio/mrjob
    def test_task_error(self):
        self.assertEqual(
            _format_error(dict(task_error=dict(
                message='system will self-destruct in 5s'
            ))),
            'system will self-destruct in 5s')

        # everything uses the same code to format path + line range, so
        # don't worry about testing all the options each time
        self.assertEqual(
            _format_error(dict(task_error=dict(
                message='system will self-destruct in 5s',
                path='/path/to/stderr',
                start_line=0,
                num_lines=1))),
            'system will self-destruct in 5s'
            '\n\n(from line 1 of /path/to/stderr)')
コード例 #10
0
    def test_task_error(self):
        self.assertEqual(
            _format_error(
                dict(task_error=dict(
                    message='system will self-destruct in 5s'))),
            'system will self-destruct in 5s')

        # everything uses the same code to format path + line range, so
        # don't worry about testing all the options each time
        self.assertEqual(
            _format_error(
                dict(task_error=dict(message='system will self-destruct in 5s',
                                     path='/path/to/stderr',
                                     start_line=0,
                                     num_lines=1))),
            'system will self-destruct in 5s'
            '\n\n(from line 1 of /path/to/stderr)')
コード例 #11
0
ファイル: test_error.py プロジェクト: imtiaz39/mrjob
    def test_task_error(self):
        self.assertEqual(
            _format_error(dict(task_error=dict(message="system will self-destruct in 5s"))),
            "\n\ncaused by:\n\n" "system will self-destruct in 5s",
        )

        # everything uses the same code to format path + line range, so
        # don't worry about testing all the options each time
        self.assertEqual(
            _format_error(
                dict(
                    task_error=dict(
                        message="system will self-destruct in 5s", path="/path/to/stderr", start_line=0, num_lines=1
                    )
                )
            ),
            "\n\ncaused by:\n\n" "system will self-destruct in 5s" "\n\n(from line 1 of /path/to/stderr)",
        )
コード例 #12
0
    def _wait_for_step_to_complete(self, job_id, step_num, num_steps):
        """Helper for _wait_for_step_to_complete(). Wait for
        step with the given ID to complete, and fetch counters.
        If it fails, attempt to diagnose the error, and raise an
        exception.

        This also adds an item to self._log_interpretations
        """
        log_interpretation = dict(job_id=job_id)
        self._log_interpretations.append(log_interpretation)

        log_interpretation['step'] = {}
        step_type = self._get_step(step_num)['type']

        while True:
            # https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs#JobStatus  # noqa
            job = self._get_job(job_id)

            job_state = job.status.State.Name(job.status.state)

            log.info('%s => %s' % (job_id, job_state))

            log_interpretation['step']['driver_output_uri'] = (
                job.driver_output_resource_uri)

            self._interpret_step_logs(log_interpretation, step_type)

            progress = log_interpretation['step'].get('progress')
            if progress:
                log.info(' ' + progress['message'])

            # https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs#State  # noqa
            # these are the states covered by the ACTIVE job state matcher,
            # plus SETUP_DONE
            if job_state in ('PENDING', 'RUNNING', 'CANCEL_PENDING',
                             'SETUP_DONE'):
                self._wait_for_api('job completion')
                continue

            # print counters if job wasn't CANCELLED
            if job_state != 'CANCELLED':
                self._log_counters(log_interpretation, step_num)

            if job_state == 'ERROR':
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n\n' %
                              _format_error(error))

            # we're done, will return at the end of this
            if job_state == 'DONE':
                break
            else:
                raise StepFailedException(step_num=step_num,
                                          num_steps=num_steps)
コード例 #13
0
ファイル: dataproc.py プロジェクト: Yelp/mrjob
    def _wait_for_step_to_complete(self, job_id, step_num, num_steps):
        """Helper for _wait_for_step_to_complete(). Wait for
        step with the given ID to complete, and fetch counters.
        If it fails, attempt to diagnose the error, and raise an
        exception.

        This also adds an item to self._log_interpretations
        """
        log_interpretation = dict(job_id=job_id)
        self._log_interpretations.append(log_interpretation)

        log_interpretation['step'] = {}
        step_type = self._get_step(step_num)['type']

        while True:
            # https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs#JobStatus  # noqa
            job = self._get_job(job_id)

            job_state = job.status.State.Name(job.status.state)

            log.info('%s => %s' % (job_id, job_state))

            log_interpretation['step']['driver_output_uri'] = (
                job.driver_output_resource_uri)

            self._interpret_step_logs(log_interpretation, step_type)

            progress = log_interpretation['step'].get('progress')
            if progress:
                log.info(' ' + progress['message'])

            # https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs#State  # noqa
            # these are the states covered by the ACTIVE job state matcher,
            # plus SETUP_DONE
            if job_state in ('PENDING', 'RUNNING',
                             'CANCEL_PENDING', 'SETUP_DONE'):
                self._wait_for_api('job completion')
                continue

            # print counters if job wasn't CANCELLED
            if job_state != 'CANCELLED':
                self._log_counters(log_interpretation, step_num)

            if job_state == 'ERROR':
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n\n' %
                              _format_error(error))

            # we're done, will return at the end of this
            if job_state == 'DONE':
                break
            else:
                raise StepFailedException(
                    step_num=step_num, num_steps=num_steps)
コード例 #14
0
ファイル: test_errors.py プロジェクト: yzhanggithub/mrjob
    def test_spark_error(self):
        self.assertEqual(
            _format_error(
                dict(spark_error=dict(
                    message=
                    'Task attempt_20190829211242_0004_m_000000_0 aborted.',
                    start_line=0,
                    num_lines=1,
                ))), 'Task attempt_20190829211242_0004_m_000000_0 aborted.')

        self.assertEqual(
            _format_error(
                dict(spark_error=dict(
                    message=
                    'Task attempt_20190829211242_0004_m_000000_0 aborted.',
                    start_line=0,
                    num_lines=1,
                    path='/path/to/log'))),
            ('Task attempt_20190829211242_0004_m_000000_0 aborted.'
             '\n\n(from line 1 of /path/to/log)'))
コード例 #15
0
ファイル: test_errors.py プロジェクト: yzhanggithub/mrjob
 def test_spark_error_hides_other_errors(self):
     self.assertEqual(
         _format_error(
             dict(
                 hadoop_error=dict(
                     message='DevastatingJavaException',
                     path='history.jhist',
                     start_line=23,
                     num_lines=1,
                 ),
                 spark_error=dict(
                     message='Aborting task',
                     start_line=99,
                     num_lines=1,
                 ),
                 task_error=dict(message='system will self-destruct in 5s'),
             )), 'Aborting task')
コード例 #16
0
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = {
        k: v
        for k, v in options.__dict__.items()
        if k not in ('quiet', 'verbose', 'step_id')
    }

    runner = EMRJobRunner(**runner_kwargs)
    emr_client = runner.make_emr_client()

    # pick step
    step = _get_step(emr_client, options.cluster_id, options.step_id)

    if not step:
        raise SystemExit(1)

    if step['Status']['State'] != 'FAILED':
        log.warning('step %s has state %s, not FAILED' %
                    (step['Id'], step['Status']['State']))

    # interpret logs
    log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name']))

    log_interpretation = dict(step_id=step['Id'])

    step_type = _infer_step_type(step)

    error = runner._pick_error(log_interpretation, step_type)

    # print error
    if error:
        log.error('Probable cause of failure:\n\n%s\n\n' %
                  _format_error(error))
    else:
        log.warning('No error detected')
コード例 #17
0
ファイル: diagnose.py プロジェクト: Affirm/mrjob
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = {k: v for k, v in options.__dict__.items()
                     if k not in ('quiet', 'verbose', 'step_id')}

    runner = EMRJobRunner(**runner_kwargs)
    emr_client = runner.make_emr_client()

    # pick step
    step = _get_step(emr_client, options.cluster_id, options.step_id)

    if not step:
        raise SystemExit(1)

    if step['Status']['State'] != 'FAILED':
        log.warning('step %s has state %s, not FAILED' %
                    (step['Id'], step['Status']['State']))

    # interpret logs
    log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name']))

    log_interpretation = dict(step_id=step['Id'])

    step_type = _infer_step_type(step)

    error = runner._pick_error(log_interpretation, step_type)

    # print error
    if error:
        log.error('Probable cause of failure:\n\n%s\n\n' %
                  _format_error(error))
    else:
        log.warning('No error detected')
コード例 #18
0
ファイル: test_errors.py プロジェクト: Dean838/mrjob
 def test_fall_back_to_json(self):
     self.assertEqual(_format_error([]), '[]')
コード例 #19
0
 def test_fall_back_to_json(self):
     self.assertEqual(_format_error([]), '[]')
コード例 #20
0
 def test_empty(self):
     self.assertEqual(_format_error({}), '')
コード例 #21
0
    def _run_job_in_hadoop(self):
        for step_num, step in enumerate(self._get_steps()):
            self._warn_about_spark_archives(step)

            step_args = self._args_for_step(step_num)
            env = self._env_for_step(step_num)

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d...' %
                     (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))
            log.debug('  with environment: %r' % sorted(env.items()))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr, record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_string(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvpe(step_args[0], step_args, env)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._step_output_uri(step_num))

            log_interpretation['step'] = step_interpretation

            step_type = step['type']

            if not _is_spark_step_type(step_type):
                counters = self._pick_counters(log_interpretation, step_type)
                if counters:
                    log.info(_format_counters(counters))
                else:
                    log.warning('No counters found')

            if returncode:
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                # use CalledProcessError's well-known message format
                reason = str(CalledProcessError(returncode, step_args))
                raise StepFailedException(reason=reason,
                                          step_num=step_num,
                                          num_steps=self._num_steps())
コード例 #22
0
    def _run_job_in_hadoop(self):
        for step_num in range(self._num_steps()):
            step_args = self._args_for_step(step_num)

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d' %
                     (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr, record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_string(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvp(step_args[0], step_args)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._hdfs_step_output_dir(step_num))

            log_interpretation['step'] = step_interpretation

            if 'counters' not in step_interpretation:
                log.info('Attempting to read counters from history log')
                self._interpret_history_log(log_interpretation)

            # just print counters for this one step
            self._print_counters(step_nums=[step_num])

            if returncode:
                error = self._pick_error(log_interpretation)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                raise CalledProcessError(returncode, step_args)
コード例 #23
0
ファイル: test_errors.py プロジェクト: Dean838/mrjob
 def test_empty(self):
     self.assertEqual(_format_error({}), '')
コード例 #24
0
ファイル: hadoop.py プロジェクト: okomestudio/mrjob
    def _run_job_in_hadoop(self):
        for step_num, step in enumerate(self._get_steps()):
            self._warn_about_spark_archives(step)

            step_args = self._args_for_step(step_num)
            env = _fix_env(self._env_for_step(step_num))

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d...' %
                     (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))
            log.debug('  with environment: %r' % sorted(env.items()))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_unicode(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvpe(step_args[0], step_args, env)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._step_output_uri(step_num))

            log_interpretation['step'] = step_interpretation

            step_type = step['type']

            if not _is_spark_step_type(step_type):
                counters = self._pick_counters(log_interpretation, step_type)
                if counters:
                    log.info(_format_counters(counters))
                else:
                    log.warning('No counters found')

            if returncode:
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                # use CalledProcessError's well-known message format
                reason = str(CalledProcessError(returncode, step_args))
                raise StepFailedException(
                    reason=reason, step_num=step_num,
                    num_steps=self._num_steps())
コード例 #25
0
ファイル: hadoop.py プロジェクト: imtiaz39/mrjob
    def _run_job_in_hadoop(self):
        for step_num in range(self._num_steps()):
            step_args = self._args_for_step(step_num)

            # log this *after* _args_for_step(), which can start a search
            # for the Hadoop streaming jar
            log.info('Running step %d of %d' %
                      (step_num + 1, self._num_steps()))
            log.debug('> %s' % cmd_line(step_args))

            log_interpretation = {}
            self._log_interpretations.append(log_interpretation)

            # try to use a PTY if it's available
            try:
                pid, master_fd = pty.fork()
            except (AttributeError, OSError):
                # no PTYs, just use Popen

                # user won't get much feedback for a while, so tell them
                # Hadoop is running
                log.debug('No PTY available, using Popen() to invoke Hadoop')

                step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE)

                step_interpretation = _interpret_hadoop_jar_command_stderr(
                    step_proc.stderr,
                    record_callback=_log_record_from_hadoop)

                # there shouldn't be much output to STDOUT
                for line in step_proc.stdout:
                    _log_line_from_hadoop(to_string(line).strip('\r\n'))

                step_proc.stdout.close()
                step_proc.stderr.close()

                returncode = step_proc.wait()
            else:
                # we have PTYs
                if pid == 0:  # we are the child process
                    os.execvp(step_args[0], step_args)
                else:
                    log.debug('Invoking Hadoop via PTY')

                    with os.fdopen(master_fd, 'rb') as master:
                        # reading from master gives us the subprocess's
                        # stderr and stdout (it's a fake terminal)
                        step_interpretation = (
                            _interpret_hadoop_jar_command_stderr(
                                master,
                                record_callback=_log_record_from_hadoop))
                        _, returncode = os.waitpid(pid, 0)

            # make sure output_dir is filled
            if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._hdfs_step_output_dir(step_num))

            log_interpretation['step'] = step_interpretation

            if 'counters' not in step_interpretation:
                log.info('Attempting to read counters from history log')
                self._interpret_history_log(log_interpretation)

            # just print counters for this one step
            self._print_counters(step_nums=[step_num])

            if returncode:
                error = self._pick_error(log_interpretation)
                if error:
                    log.error('Probable cause of failure:\n\n%s\n' %
                              _format_error(error))

                raise CalledProcessError(returncode, step_args)