def _run_hadoop(self, hadoop_args, env, record_callback): # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(hadoop_args, stdout=PIPE, stderr=PIPE, env=env) step_interpretation = _interpret_hadoop_jar_command_stderr( step_proc.stderr, record_callback=_log_record_from_hadoop) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_driver(to_unicode(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process try: os.execvpe(hadoop_args[0], hadoop_args, env) # now we are no longer Python except OSError as ex: # use _exit() so we don't do cleanup, etc. that's # the parent process's job os._exit(ex.errno) finally: # if we got some other exception, still exit hard os._exit(-1) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_interpretation = ( _interpret_hadoop_jar_command_stderr( _eio_to_eof(master), record_callback=_log_record_from_hadoop)) _, returncode = os.waitpid(pid, 0) return returncode, step_interpretation
def test_lines_can_be_bytes(self): self.assertEqual( _interpret_hadoop_jar_command_stderr( [b"15/12/11 13:33:11 INFO mapreduce.Job:" b" Running job: job_1449857544442_0002\n"] ), dict(job_id="job_1449857544442_0002"), )
def test_yarn_error(self): lines = [ '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :' ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n', 'Error: java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads(): subprocess failed with code 1\n', '\tat org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:330)\n', '\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished' '(PipeMapRed.java:543)\n', '\n', ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict(errors=[ dict( attempt_id='attempt_1453488173054_0001_m_000000_0', hadoop_error=dict( message=( 'Error: java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads(): subprocess failed with' ' code 1\n\tat org.apache.hadoop.streaming' '.PipeMapRed.waitOutputThreads(PipeMapRed.java' ':330)\n\tat org.apache.hadoop.streaming' '.PipeMapRed.mapRedFinished(PipeMapRed.java' ':543)'), num_lines=5, start_line=0, ), # task ID is implied by attempt ID task_id='task_1453488173054_0001_m_000000', ) ]))
def test_yarn_error_without_exception(self): # when there's no exception, just use the whole line as the message lines = [ '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :' ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n', ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict( errors=[ dict( attempt_id='attempt_1453488173054_0001_m_000000_0', hadoop_error=dict( message=( 'Task Id :' ' attempt_1453488173054_0001_m_000000_0,' ' Status : FAILED' ), num_lines=1, start_line=0, ), # task ID is implied by attempt ID task_id='task_1453488173054_0001_m_000000', ) ] ))
def test_yarn_error(self): lines = [ "16/01/22 19:14:16 INFO mapreduce.Job: Task Id :" " attempt_1453488173054_0001_m_000000_0, Status : FAILED\n", "Error: java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads(): subprocess failed with code 1\n", "\tat org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:330)\n", "\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished" "(PipeMapRed.java:543)\n", "\n", ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict( errors=[ dict( attempt_id="attempt_1453488173054_0001_m_000000_0", hadoop_error=dict( message=( "Error: java.lang.RuntimeException: PipeMapRed" ".waitOutputThreads(): subprocess failed with" " code 1\n\tat org.apache.hadoop.streaming" ".PipeMapRed.waitOutputThreads(PipeMapRed.java" ":330)\n\tat org.apache.hadoop.streaming" ".PipeMapRed.mapRedFinished(PipeMapRed.java" ":543)" ), num_lines=5, start_line=0, ), # task ID is implied by attempt ID task_id="task_1453488173054_0001_m_000000", ) ] ), )
def test_record_callback(self): records = [] def record_callback(record): records.append(record) lines = [ "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop" "-unjar7873615084086492115/] []" " /tmp/streamjob737002412080260811.jar tmpDir=null\n", "15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n", "Streaming Command Failed!\n", ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines, record_callback=record_callback), dict(job_id="job_1449857544442_0002"), ) self.assertEqual( records, [ dict( caller_location="", level="", logger="", message=( "packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop" "-unjar7873615084086492115/] []" " /tmp/streamjob737002412080260811.jar" " tmpDir=null" ), num_lines=1, start_line=0, thread="", timestamp="", ), dict( caller_location="", level="INFO", logger="mapreduce.Job", message="Running job: job_1449857544442_0002", num_lines=1, start_line=1, thread="", timestamp="15/12/11 13:33:11", ), dict( caller_location="", level="", logger="", message="Streaming Command Failed!", num_lines=1, start_line=2, thread="", timestamp="", ), ], )
def test_treat_eio_as_eof(self): def yield_lines(): yield ("15/12/11 13:33:11 INFO mapreduce.Job:" " Running job: job_1449857544442_0002\n") e = IOError() e.errno = errno.EIO raise e self.assertEqual(_interpret_hadoop_jar_command_stderr(yield_lines()), dict(job_id="job_1449857544442_0002"))
def test_record_callback(self): records = [] def record_callback(record): records.append(record) lines = [ 'packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop' '-unjar7873615084086492115/] []' ' /tmp/streamjob737002412080260811.jar tmpDir=null\n', '15/12/11 13:33:11 INFO mapreduce.Job:' ' Running job: job_1449857544442_0002\n', 'Streaming Command Failed!\n', ] self.assertEqual( _interpret_hadoop_jar_command_stderr( lines, record_callback=record_callback), dict(job_id='job_1449857544442_0002')) self.assertEqual( records, [ dict( caller_location='', level='', logger='', message=('packageJobJar: [/mnt/var/lib/hadoop/tmp/hadoop' '-unjar7873615084086492115/] []' ' /tmp/streamjob737002412080260811.jar' ' tmpDir=null'), num_lines=1, start_line=0, thread='', timestamp='', ), dict( caller_location='', level='INFO', logger='mapreduce.Job', message='Running job: job_1449857544442_0002', num_lines=1, start_line=1, thread='', timestamp='15/12/11 13:33:11', ), dict( caller_location='', level='', logger='', message='Streaming Command Failed!', num_lines=1, start_line=2, thread='', timestamp='', ), ])
def test_infer_job_id_from_application_id(self): lines = [ "15/12/11 13:32:45 INFO impl.YarnClientImpl:" " Submitted application application_1449857544442_0002\n" ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict(application_id="application_1449857544442_0002", job_id="job_1449857544442_0002"), )
def test_infer_job_id_from_application_id(self): lines = [ '15/12/11 13:32:45 INFO impl.YarnClientImpl:' ' Submitted application application_1449857544442_0002\n', ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict(application_id='application_1449857544442_0002', job_id='job_1449857544442_0002'))
def test_treat_eio_as_eof(self): def yield_lines(): yield ('15/12/11 13:33:11 INFO mapreduce.Job:' ' Running job: job_1449857544442_0002\n') e = IOError() e.errno = errno.EIO raise e self.assertEqual(_interpret_hadoop_jar_command_stderr(yield_lines()), dict(job_id='job_1449857544442_0002'))
def test_yarn_error(self): lines = [ '16/01/22 19:14:16 INFO mapreduce.Job: Task Id :' ' attempt_1453488173054_0001_m_000000_0, Status : FAILED\n', 'Error: java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads(): subprocess failed with code 1\n', '\tat org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:330)\n', '\tat org.apache.hadoop.streaming.PipeMapRed.mapRedFinished' '(PipeMapRed.java:543)\n', '\n', ] self.assertEqual( _interpret_hadoop_jar_command_stderr(lines), dict( errors=[ dict( attempt_id='attempt_1453488173054_0001_m_000000_0', hadoop_error=dict( message=( 'Error: java.lang.RuntimeException: PipeMapRed' '.waitOutputThreads(): subprocess failed with' ' code 1\n\tat org.apache.hadoop.streaming' '.PipeMapRed.waitOutputThreads(PipeMapRed.java' ':330)\n\tat org.apache.hadoop.streaming' '.PipeMapRed.mapRedFinished(PipeMapRed.java' ':543)' ), num_lines=5, start_line=0, ), # task ID is implied by attempt ID task_id='task_1453488173054_0001_m_000000', ) ] ))
def test_pre_yarn(self): self.assertEqual( _interpret_hadoop_jar_command_stderr(PRE_YARN_STEP_LOG_LINES), PARSED_PRE_YARN_STEP_LOG_LINES)
def test_empty(self): self.assertEqual(_interpret_hadoop_jar_command_stderr([]), {})
def _run_job_in_hadoop(self): for step_num, step in enumerate(self._get_steps()): self._warn_about_spark_archives(step) step_args = self._args_for_step(step_num) env = self._env_for_step(step_num) # log this *after* _args_for_step(), which can start a search # for the Hadoop streaming jar log.info('Running step %d of %d...' % (step_num + 1, self._num_steps())) log.debug('> %s' % cmd_line(step_args)) log.debug(' with environment: %r' % sorted(env.items())) log_interpretation = {} self._log_interpretations.append(log_interpretation) # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env) step_interpretation = _interpret_hadoop_jar_command_stderr( step_proc.stderr, record_callback=_log_record_from_hadoop) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_hadoop(to_string(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process os.execvpe(step_args[0], step_args, env) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_interpretation = ( _interpret_hadoop_jar_command_stderr( master, record_callback=_log_record_from_hadoop)) _, returncode = os.waitpid(pid, 0) # make sure output_dir is filled if 'output_dir' not in step_interpretation: step_interpretation['output_dir'] = ( self._step_output_uri(step_num)) log_interpretation['step'] = step_interpretation step_type = step['type'] if not _is_spark_step_type(step_type): counters = self._pick_counters(log_interpretation, step_type) if counters: log.info(_format_counters(counters)) else: log.warning('No counters found') if returncode: error = self._pick_error(log_interpretation, step_type) if error: log.error('Probable cause of failure:\n\n%s\n' % _format_error(error)) # use CalledProcessError's well-known message format reason = str(CalledProcessError(returncode, step_args)) raise StepFailedException(reason=reason, step_num=step_num, num_steps=self._num_steps())
def test_yarn(self): self.assertEqual(_interpret_hadoop_jar_command_stderr(YARN_STEP_LOG_LINES), PARSED_YARN_STEP_LOG_LINES)
def _run_job_in_hadoop(self): for step_num in range(self._num_steps()): step_args = self._args_for_step(step_num) # log this *after* _args_for_step(), which can start a search # for the Hadoop streaming jar log.info('Running step %d of %d' % (step_num + 1, self._num_steps())) log.debug('> %s' % cmd_line(step_args)) log_interpretation = {} self._log_interpretations.append(log_interpretation) # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE) step_interpretation = _interpret_hadoop_jar_command_stderr( step_proc.stderr, record_callback=_log_record_from_hadoop) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_hadoop(to_string(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process os.execvp(step_args[0], step_args) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_interpretation = ( _interpret_hadoop_jar_command_stderr( master, record_callback=_log_record_from_hadoop)) _, returncode = os.waitpid(pid, 0) # make sure output_dir is filled if 'output_dir' not in step_interpretation: step_interpretation['output_dir'] = ( self._hdfs_step_output_dir(step_num)) log_interpretation['step'] = step_interpretation if 'counters' not in step_interpretation: log.info('Attempting to read counters from history log') self._interpret_history_log(log_interpretation) # just print counters for this one step self._print_counters(step_nums=[step_num]) if returncode: error = self._pick_error(log_interpretation) if error: log.error('Probable cause of failure:\n\n%s\n' % _format_error(error)) raise CalledProcessError(returncode, step_args)
def test_lines_can_be_bytes(self): self.assertEqual( _interpret_hadoop_jar_command_stderr([ b'15/12/11 13:33:11 INFO mapreduce.Job:' b' Running job: job_1449857544442_0002\n' ]), dict(job_id='job_1449857544442_0002'))
def _run_job_in_hadoop(self): for step_num, step in enumerate(self._get_steps()): self._warn_about_spark_archives(step) step_args = self._args_for_step(step_num) env = _fix_env(self._env_for_step(step_num)) # log this *after* _args_for_step(), which can start a search # for the Hadoop streaming jar log.info('Running step %d of %d...' % (step_num + 1, self._num_steps())) log.debug('> %s' % cmd_line(step_args)) log.debug(' with environment: %r' % sorted(env.items())) log_interpretation = {} self._log_interpretations.append(log_interpretation) # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE, env=env) step_interpretation = _interpret_hadoop_jar_command_stderr( step_proc.stderr, record_callback=_log_record_from_hadoop) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_hadoop(to_unicode(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process os.execvpe(step_args[0], step_args, env) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_interpretation = ( _interpret_hadoop_jar_command_stderr( master, record_callback=_log_record_from_hadoop)) _, returncode = os.waitpid(pid, 0) # make sure output_dir is filled if 'output_dir' not in step_interpretation: step_interpretation['output_dir'] = ( self._step_output_uri(step_num)) log_interpretation['step'] = step_interpretation step_type = step['type'] if not _is_spark_step_type(step_type): counters = self._pick_counters(log_interpretation, step_type) if counters: log.info(_format_counters(counters)) else: log.warning('No counters found') if returncode: error = self._pick_error(log_interpretation, step_type) if error: log.error('Probable cause of failure:\n\n%s\n' % _format_error(error)) # use CalledProcessError's well-known message format reason = str(CalledProcessError(returncode, step_args)) raise StepFailedException( reason=reason, step_num=step_num, num_steps=self._num_steps())