def test_find_input_uri_for_mapper(self): LOG_LINES = [ 'garbage\n', "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz' for reading\n", "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00078.gz' for reading\n", ] self.assertEqual(find_input_uri_for_mapper(line for line in LOG_LINES), 's3://yourbucket/logs/2010/07/23/log2-00078.gz')
def _scan_for_input_uri(log_file_uri, runner): """Scan the syslog file corresponding to log_file_uri for information about the input file. Helper function for :py:func:`scan_task_attempt_logs()` """ syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), 'syslog') syslog_lines = runner.cat(syslog_uri) if syslog_lines: log.debug('scanning %s for input URI' % syslog_uri) return find_input_uri_for_mapper(syslog_lines) else: return None
def _scan_for_input_uri(log_file_uri, runner): """Scan the syslog file corresponding to log_file_uri for information about the input file. Helper function for :py:func:`scan_task_attempt_logs()` """ syslog_uri = posixpath.join( posixpath.dirname(log_file_uri), 'syslog') syslog_lines = runner.cat(syslog_uri) if syslog_lines: log.debug('scanning %s for input URI' % syslog_uri) return find_input_uri_for_mapper(syslog_lines) else: return None
def _scan_for_input_uri(log_file_uri, runner): """Scan the syslog file corresponding to log_file_uri for information about the input file. Helper function for :py:func:`scan_task_attempt_logs()` """ # TODO: verify that this works on 3.x AMIs syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), "syslog") if log_file_uri.endswith(".gz"): syslog_uri += ".gz" syslog_lines = runner.cat(syslog_uri) if syslog_lines: log.debug("scanning %s for input URI" % syslog_uri) return find_input_uri_for_mapper(syslog_lines) else: return None
def _scan_for_input_uri(log_file_uri, fs): """Scan the syslog file corresponding to log_file_uri for information about the input file. Helper function for :py:func:`scan_task_attempt_logs()` """ # TODO: verify that this works on 3.x AMIs syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), 'syslog') if log_file_uri.endswith('.gz'): syslog_uri += '.gz' syslog_lines = fs.cat(syslog_uri) if syslog_lines: log.debug('scanning %s for input URI' % syslog_uri) return find_input_uri_for_mapper(syslog_lines) else: return None
def test_empty(self): self.assertEqual(find_input_uri_for_mapper([]), None) self.assertEqual(find_hadoop_java_stack_trace([]), None) self.assertEqual(find_interesting_hadoop_streaming_error([]), None)