예제 #1
0
    def _run_step_on_spark(self, step, step_num):
        if self._opts['upload_archives']:
            log.warning('Spark master %r will probably ignore archives' %
                        self._spark_master())

        step_args = self._args_for_spark_step(step_num)

        env = dict(os.environ)
        env.update(self._spark_cmdenv(step_num))

        log.debug('> %s' % cmd_line(step_args))
        log.debug('  with environment: %r' % sorted(env.items()))

        def _log_line(line):
            log.info('  %s' % to_unicode(line).strip('\r\n'))

        # try to use a PTY if it's available
        try:
            pid, master_fd = pty.fork()
        except (AttributeError, OSError):
            # no PTYs, just use Popen

            # user won't get much feedback for a while, so tell them
            # spark-submit is running
            log.debug('No PTY available, using Popen() to invoke spark-submit')

            step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env)

            for line in step_proc.stderr:
                for record in _parse_hadoop_log4j_records(
                        _yield_lines_from_pty_or_pipe(step_proc.stderr)):
                    _log_log4j_record(record)

            # there shouldn't be much output on STDOUT
            for line in step_proc.stdout:
                _log_line_from_driver(line)

            step_proc.stdout.close()
            step_proc.stderr.close()

            returncode = step_proc.wait()
        else:
            # we have PTYs
            if pid == 0:  # we are the child process
                os.execvpe(step_args[0], step_args, env)
            else:
                log.debug('Invoking spark-submit via PTY')

                with os.fdopen(master_fd, 'rb') as master:
                    for record in _parse_hadoop_log4j_records(
                            _yield_lines_from_pty_or_pipe(master)):
                        _log_log4j_record(record)
                    _, returncode = os.waitpid(pid, 0)

        if returncode:
            reason = str(CalledProcessError(returncode, step_args))
            raise StepFailedException(reason=reason,
                                      step_num=step_num,
                                      num_steps=self._num_steps())
예제 #2
0
파일: hadoop.py 프로젝트: dasalgadoc/mrjob
def _log_record_from_hadoop(record):
    """Log log4j record parsed from hadoop stderr."""
    if not _is_counter_log4j_record(record):  # counters are printed separately
        _log_log4j_record(record)
예제 #3
0
파일: hadoop.py 프로젝트: Affirm/mrjob
def _log_record_from_hadoop(record):
    """Log log4j record parsed from hadoop stderr."""
    if not _is_counter_log4j_record(record):  # counters are printed separately
        _log_log4j_record(record)