def test_task_log_error_no_line_nums(self): cause = dict( type="task", syslog=dict( path=self.SYSLOG_PATH, split=dict(start_line=None, num_lines=None, path=self.INPUT_URI), error=dict(stack_trace=self.JAVA_STACK_TRACE, exception=self.JAVA_EXCEPTION), ), stderr=dict( path=self.STDERR_PATH, error=dict(exception=self.PYTHON_EXCEPTION, traceback=self.PYTHON_TRACEBACK) ), ) self.assertEqual( _format_cause_of_failure(cause), ["Probable cause of failure (from " + self.SYSLOG_PATH + "):", "", self.JAVA_EXCEPTION] + self.JAVA_STACK_TRACE + ["", "caused by Python exception (from " + self.STDERR_PATH + "):", ""] + self.PYTHON_TRACEBACK + [self.PYTHON_EXCEPTION, "", "while reading input from " + self.INPUT_URI], )
def test_task_log_error_no_traceback(self): cause = dict( type="task", syslog=dict( path=self.SYSLOG_PATH, split=dict(start_line=0, num_lines=335, path=self.INPUT_URI), error=dict(stack_trace=self.JAVA_STACK_TRACE, exception=self.JAVA_EXCEPTION), ), stderr=dict(path=self.STDERR_PATH, error=None), ) self.assertEqual( _format_cause_of_failure(cause), ["Probable cause of failure (from " + self.SYSLOG_PATH + "):", "", self.JAVA_EXCEPTION] + self.JAVA_STACK_TRACE + [ "", "while reading input from lines 1-336 of " + self.INPUT_URI, "", "(see " + self.STDERR_PATH + " for task stderr)", ], )
def _run_job_in_hadoop(self): for step_num in range(self._num_steps()): step_args = self._args_for_step(step_num) # log this *after* _args_for_step(), which can start a search # for the Hadoop streaming jar log.info('Running step %d of %d' % (step_num + 1, self._num_steps())) log.debug('> %s' % cmd_line(step_args)) # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE) step_info = _process_stderr_from_streaming(step_proc.stderr) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_hadoop(to_string(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process os.execvp(step_args[0], step_args) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_info = _process_stderr_from_streaming( _wrap_streaming_pty_output(master)) _, returncode = os.waitpid(pid, 0) # make sure output_dir is filled if not step_info['output_dir']: step_info['output_dir'] = self._hdfs_step_output_dir(step_num) if not step_info['counters']: pass # TODO: fetch counters; see _fetch_counters() self._steps_info.append(step_info) # just print counters for this one step self._print_counters(step_nums=[step_num]) if returncode: err_lines = [ 'Job failed with return code %d: %s' % (returncode, cmd_line(step_args)) ] cause = self._find_probable_cause_of_failure(**step_info) if cause: err_lines.append('') # pad with empty line err_lines.extend(_format_cause_of_failure(cause)) for err_line in err_lines: log.error(err_line) raise Exception('\n'.join(err_lines) + '\n')
def _run_job_in_hadoop(self): for step_num in range(self._num_steps()): step_args = self._args_for_step(step_num) # log this *after* _args_for_step(), which can start a search # for the Hadoop streaming jar log.info('Running step %d of %d' % (step_num + 1, self._num_steps())) log.debug('> %s' % cmd_line(step_args)) # try to use a PTY if it's available try: pid, master_fd = pty.fork() except (AttributeError, OSError): # no PTYs, just use Popen # user won't get much feedback for a while, so tell them # Hadoop is running log.debug('No PTY available, using Popen() to invoke Hadoop') step_proc = Popen(step_args, stdout=PIPE, stderr=PIPE) step_info = _process_stderr_from_streaming( step_proc.stderr) # there shouldn't be much output to STDOUT for line in step_proc.stdout: _log_line_from_hadoop(to_string(line).strip('\r\n')) step_proc.stdout.close() step_proc.stderr.close() returncode = step_proc.wait() else: # we have PTYs if pid == 0: # we are the child process os.execvp(step_args[0], step_args) else: log.debug('Invoking Hadoop via PTY') with os.fdopen(master_fd, 'rb') as master: # reading from master gives us the subprocess's # stderr and stdout (it's a fake terminal) step_info = _process_stderr_from_streaming( _wrap_streaming_pty_output(master)) _, returncode = os.waitpid(pid, 0) # make sure output_dir is filled if not step_info['output_dir']: step_info['output_dir'] = self._hdfs_step_output_dir(step_num) if not step_info['counters']: log.info('Attempting to read counters from history log') history = self._interpret_history_log(step_info) if history: step_info['counters'] = history['counters'] self._steps_info.append(step_info) # just print counters for this one step self._print_counters(step_nums=[step_num]) if returncode: err_lines = [ 'Job failed with return code %d: %s' % (returncode, cmd_line(step_args))] cause = self._find_probable_cause_of_failure(**step_info) if cause: err_lines.append('') # pad with empty line err_lines.extend(_format_cause_of_failure(cause)) for err_line in err_lines: log.error(err_line) raise Exception('\n'.join(err_lines) + '\n')
def test_empty(self): # should fall back self.assertEqual(_format_cause_of_failure(None), ["Probable cause of failure: None"])