def test_error_followed_by_subprocess_failed_stack_trace(self): # real example, from #1430 lines = [ 'Traceback (most recent call last):', ' File "mr_boom.py", line 10, in <module>', ' MRBoom.run()', # ... 'Exception: BOOM', 'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1', ' at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads' '(PipeMapRed.java:372)', ' at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished' '(PipeMapRed.java:586)', # ... ] self.assertEqual( _parse_task_stderr(lines), dict( message=( 'Traceback (most recent call last):\n' ' File "mr_boom.py", line 10, in <module>\n' ' MRBoom.run()\n' 'Exception: BOOM'), start_line=0, num_lines=4, ) )
def _log_cause_of_error(self, ex): if not isinstance(ex, _TaskFailedException): # if something went wrong inside mrjob, the stacktrace # will bubble up to the top level return # not using LogInterpretationMixin because it would be overkill input_path = self._task_input_path( ex.task_type, ex.step_num, ex.task_num) stderr_path = self._task_stderr_path( ex.task_type, ex.step_num, ex.task_num) if self.fs.exists(stderr_path): # it should, but just to be safe # log-parsing code expects "str", not bytes; open in text mode with open(stderr_path) as stderr: task_error = _parse_task_stderr(stderr) if task_error: task_error['path'] = stderr_path log.error('Cause of failure:\n\n%s\n\n' % _format_error(dict( split=dict(path=input_path), task_error=task_error))) return # fallback if we can't find the error (e.g. the job does something # weird to stderr or stack traces) log.error('Error while reading from %s:\n' % input_path)
def test_error_without_leading_plus(self): lines = ["ERROR: something is terribly, terribly wrong\n", "OH THE HORROR\n"] self.assertEqual( _parse_task_stderr(lines), dict(message=("ERROR: something is terribly, terribly wrong\n" "OH THE HORROR"), start_line=0, num_lines=2), )
def test_strip_carriage_return(self): lines = ["+ rm /\r\n", "rm: cannot remove ‘/’: Is a directory\r\n"] self.assertEqual( _parse_task_stderr(lines), dict(message="+ rm /\nrm: cannot remove ‘/’: Is a directory", start_line=0, num_lines=2), )
def _log_cause_of_error(self, ex): if not isinstance(ex, _TaskFailedException): # if something went wrong inside mrjob, the stacktrace # will bubble up to the top level return # not using LogInterpretationMixin because it would be overkill if not self._opts['read_logs']: return input_path = self._task_input_path( ex.task_type, ex.step_num, ex.task_num) stderr_path = self._task_stderr_path( ex.task_type, ex.step_num, ex.task_num) if self.fs.exists(stderr_path): # it should, but just to be safe # log-parsing code expects "str", not bytes; open in text mode with open(stderr_path) as stderr: task_error = _parse_task_stderr(stderr) if task_error: task_error['path'] = stderr_path log.error('Cause of failure:\n\n%s\n\n' % _format_error(dict( split=dict(path=input_path), task_error=task_error))) return # fallback if we can't find the error (e.g. the job does something # weird to stderr or stack traces) log.error('Error while reading from %s:\n' % input_path)
def test_error_followed_by_subprocess_failed_stack_trace(self): # real example, from #1430 lines = [ 'Traceback (most recent call last):', ' File "mr_boom.py", line 10, in <module>', ' MRBoom.run()', # ... 'Exception: BOOM', 'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1', ' at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads' '(PipeMapRed.java:372)', ' at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished' '(PipeMapRed.java:586)', # ... ] self.assertEqual( _parse_task_stderr(lines), dict( message=('Traceback (most recent call last):\n' ' File "mr_boom.py", line 10, in <module>\n' ' MRBoom.run()\n' 'Exception: BOOM'), start_line=0, num_lines=4, ))
def test_log4j_init_warnings(self): lines = [ "log4j:WARN No appenders could be found for logger" " (amazon.emr.metrics.MetricsSaver).\n", "log4j:WARN Please initialize the log4j system properly.\n", "log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html" "#noconfig for more info.\n", ] self.assertEqual(_parse_task_stderr(lines), None)
def test_log4j_init_warnings(self): lines = [ 'log4j:WARN No appenders could be found for logger' ' (amazon.emr.metrics.MetricsSaver).\n', 'log4j:WARN Please initialize the log4j system properly.\n', 'log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html' '#noconfig for more info.\n', ] self.assertEqual(_parse_task_stderr(lines), None)
def test_silent_bad_actor(self): lines = [ '+ false\n', ] self.assertEqual(_parse_task_stderr(lines), dict( message='+ false', start_line=0, num_lines=1, ))
def test_silent_bad_actor(self): lines = [ '+ false\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message='+ false', start_line=0, num_lines=1, ))
def test_python_exception(self): lines = [ "+ python mr_boom.py --step-num=0 --mapper\n", "Traceback (most recent call last):\n", ' File "mr_boom.py", line 10, in <module>\n', " MRBoom.run()\n", "Exception: BOOM\n", ] self.assertEqual( _parse_task_stderr(lines), dict(message="".join(lines).rstrip("\n"), start_line=0, num_lines=5) )
def test_strip_carriage_return(self): lines = [ '+ rm /\r\n', 'rm: cannot remove ‘/’: Is a directory\r\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message='+ rm /\nrm: cannot remove ‘/’: Is a directory', start_line=0, num_lines=2, ))
def test_error_with_log4j_init_warnings(self): lines = [ "ERROR: something is terribly, terribly wrong\n", "OH THE HORROR\n", "log4j:WARN No appenders could be found for logger" " (amazon.emr.metrics.MetricsSaver).\n", "log4j:WARN Please initialize the log4j system properly.\n", "log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html" "#noconfig for more info.\n", ] self.assertEqual( _parse_task_stderr(lines), dict(message=("ERROR: something is terribly, terribly wrong\n" "OH THE HORROR"), start_line=0, num_lines=2), )
def test_subprocess_failed_stack_trace(self): # real example, with fanciful error code lines = [ 'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code ^^vv<><>BA', ' at org.apache.hadoop.streaming.PipeMapRed.waitOutputThreads' '(PipeMapRed.java:372)', ' at org.apache.hadoop.streaming.PipeMapRed.mapRedFinished' '(PipeMapRed.java:586)', # ... ] self.assertEqual(_parse_task_stderr(lines), None)
def test_error_without_leading_plus(self): lines = [ 'ERROR: something is terribly, terribly wrong\n', 'OH THE HORROR\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message=('ERROR: something is terribly, terribly wrong\n' 'OH THE HORROR'), start_line=0, num_lines=2, ))
def test_error_without_leading_plus(self): lines = [ 'ERROR: something is terribly, terribly wrong\n', 'OH THE HORROR\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message=('ERROR: something is terribly, terribly wrong\n' 'OH THE HORROR'), start_line=0, num_lines=2, ) )
def test_python_exception(self): lines = [ '+ python mr_boom.py --step-num=0 --mapper\n', 'Traceback (most recent call last):\n', ' File "mr_boom.py", line 10, in <module>\n', ' MRBoom.run()\n', 'Exception: BOOM\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message=''.join(lines).rstrip('\n'), start_line=0, num_lines=5, ))
def test_error_with_log4j_init_warnings(self): lines = [ 'ERROR: something is terribly, terribly wrong\n', 'OH THE HORROR\n', 'log4j:WARN No appenders could be found for logger' ' (amazon.emr.metrics.MetricsSaver).\n', 'log4j:WARN Please initialize the log4j system properly.\n', 'log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html' '#noconfig for more info.\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message=('ERROR: something is terribly, terribly wrong\n' 'OH THE HORROR'), start_line=0, num_lines=2, ))
def test_setup_command_error(self): lines = [ "+ __mrjob_PWD=/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache" "/hadoop/appcache/application_1453488173054_0002" "/container_1453488173054_0002_01_000005\n", "+ exec\n", "+ python3 -c 'import fcntl; fcntl.flock(9, fcntl.LOCK_EX)\n", "+ export PYTHONPATH=/mnt/var/lib/hadoop/tmp/nm-local-dir" "/usercache/hadoop/appcache/application_1453488173054_0002" "/container_1453488173054_0002_01_000005/mrjob.tar.gz:\n", "+ PYTHONPATH=/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache" "/hadoop/appcache/application_1453488173054_0002" "/container_1453488173054_0002_01_000005/mrjob.tar.gz:\n", "+ rm /\n", "rm: cannot remove ‘/’: Is a directory\n", ] self.assertEqual( _parse_task_stderr(lines), dict(message="+ rm /\nrm: cannot remove ‘/’: Is a directory", start_line=5, num_lines=2), )
def _task_log_interpretation(self, application_id, step_type, partial=True): """Helper for :py:meth:`_interpret_task_logs`""" # not bothering with _read_logs() since this is a helper method result = {} for container_id in self._failed_task_container_ids(application_id): error = _parse_task_syslog_records( self._task_syslog_records(application_id, container_id, step_type)) if not error.get('hadoop_error'): # not sure if this ever happens, since we already know # which containers failed continue error['container_id'] = container_id # fix weird munging of java stacktrace error['hadoop_error']['message'] = _fix_java_stack_trace( error['hadoop_error']['message']) task_error = _parse_task_stderr( self._task_stderr_lines(application_id, container_id, step_type)) if task_error: task_error['message'] = _fix_traceback(task_error['message']) error['task_error'] = task_error result.setdefault('errors', []).append(error) # if partial is true, bail out when we find the first task error if task_error and partial: result['partial'] = True return result return result
def _task_log_interpretation( self, application_id, step_type, partial=True): """Helper for :py:meth:`_interpret_task_logs`""" # not bothering with _read_logs() since this is a helper method result = {} for container_id in self._failed_task_container_ids(application_id): error = _parse_task_syslog_records( self._task_syslog_records( application_id, container_id, step_type)) if not error.get('hadoop_error'): # not sure if this ever happens, since we already know # which containers failed continue error['container_id'] = container_id # fix weird munging of java stacktrace error['hadoop_error']['message'] = _fix_java_stack_trace( error['hadoop_error']['message']) task_error = _parse_task_stderr( self._task_stderr_lines( application_id, container_id, step_type)) if task_error: task_error['message'] = _fix_traceback(task_error['message']) error['task_error'] = task_error result.setdefault('errors', []).append(error) # if partial is true, bail out when we find the first task error if task_error and partial: result['partial'] = True return result return result
def test_setup_command_error(self): lines = [ '+ __mrjob_PWD=/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache' '/hadoop/appcache/application_1453488173054_0002' '/container_1453488173054_0002_01_000005\n', '+ exec\n', "+ python3 -c 'import fcntl; fcntl.flock(9, fcntl.LOCK_EX)\n", '+ export PYTHONPATH=/mnt/var/lib/hadoop/tmp/nm-local-dir' '/usercache/hadoop/appcache/application_1453488173054_0002' '/container_1453488173054_0002_01_000005/mrjob.zip:\n', '+ PYTHONPATH=/mnt/var/lib/hadoop/tmp/nm-local-dir/usercache' '/hadoop/appcache/application_1453488173054_0002' '/container_1453488173054_0002_01_000005/mrjob.zip:\n', '+ rm /\n', 'rm: cannot remove ‘/’: Is a directory\n', ] self.assertEqual( _parse_task_stderr(lines), dict( message='+ rm /\nrm: cannot remove ‘/’: Is a directory', start_line=5, num_lines=2, ))
def test_error_without_leading_plus(self): lines = ["ERROR: something is terribly, terribly wrong\n"] self.assertEqual(_parse_task_stderr(lines), None)
def test_silent_bad_actor(self): lines = ["+ false\n"] self.assertEqual(_parse_task_stderr(lines), dict(message="+ false", start_line=0, num_lines=1))
def test_empty(self): self.assertEqual(_parse_task_stderr([]), None)
def test_error_without_leading_plus(self): lines = [ 'ERROR: something is terribly, terribly wrong\n', ] self.assertEqual(_parse_task_stderr(lines), None)