Example #1
0
 def test_find_input_uri_for_mapper(self):
     LOG_LINES = [
         'garbage\n',
         "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz' for reading\n",
         "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00078.gz' for reading\n",
     ]
     self.assertEqual(find_input_uri_for_mapper(line for line in LOG_LINES),
                      's3://yourbucket/logs/2010/07/23/log2-00078.gz')
Example #2
0
 def test_find_input_uri_for_mapper(self):
     LOG_LINES = [
         'garbage\n',
         "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz' for reading\n",
         "2010-07-27 17:54:54,344 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://yourbucket/logs/2010/07/23/log2-00078.gz' for reading\n",
     ]
     self.assertEqual(find_input_uri_for_mapper(line for line in LOG_LINES),
                      's3://yourbucket/logs/2010/07/23/log2-00078.gz')
Example #3
0
def _scan_for_input_uri(log_file_uri, runner):
    """Scan the syslog file corresponding to log_file_uri for
    information about the input file.

    Helper function for :py:func:`scan_task_attempt_logs()`
    """
    syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), 'syslog')

    syslog_lines = runner.cat(syslog_uri)
    if syslog_lines:
        log.debug('scanning %s for input URI' % syslog_uri)
        return find_input_uri_for_mapper(syslog_lines)
    else:
        return None
Example #4
0
def _scan_for_input_uri(log_file_uri, runner):
    """Scan the syslog file corresponding to log_file_uri for
    information about the input file.

    Helper function for :py:func:`scan_task_attempt_logs()`
    """
    syslog_uri = posixpath.join(
        posixpath.dirname(log_file_uri), 'syslog')

    syslog_lines = runner.cat(syslog_uri)
    if syslog_lines:
        log.debug('scanning %s for input URI' % syslog_uri)
        return find_input_uri_for_mapper(syslog_lines)
    else:
        return None
Example #5
0
def _scan_for_input_uri(log_file_uri, runner):
    """Scan the syslog file corresponding to log_file_uri for
    information about the input file.

    Helper function for :py:func:`scan_task_attempt_logs()`
    """
    # TODO: verify that this works on 3.x AMIs
    syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), "syslog")
    if log_file_uri.endswith(".gz"):
        syslog_uri += ".gz"

    syslog_lines = runner.cat(syslog_uri)
    if syslog_lines:
        log.debug("scanning %s for input URI" % syslog_uri)
        return find_input_uri_for_mapper(syslog_lines)
    else:
        return None
Example #6
0
def _scan_for_input_uri(log_file_uri, fs):
    """Scan the syslog file corresponding to log_file_uri for
    information about the input file.

    Helper function for :py:func:`scan_task_attempt_logs()`
    """
    # TODO: verify that this works on 3.x AMIs
    syslog_uri = posixpath.join(posixpath.dirname(log_file_uri), 'syslog')
    if log_file_uri.endswith('.gz'):
        syslog_uri += '.gz'

    syslog_lines = fs.cat(syslog_uri)
    if syslog_lines:
        log.debug('scanning %s for input URI' % syslog_uri)
        return find_input_uri_for_mapper(syslog_lines)
    else:
        return None
Example #7
0
 def test_empty(self):
     self.assertEqual(find_input_uri_for_mapper([]), None)
     self.assertEqual(find_hadoop_java_stack_trace([]), None)
     self.assertEqual(find_interesting_hadoop_streaming_error([]), None)
Example #8
0
 def test_empty(self):
     self.assertEqual(find_input_uri_for_mapper([]), None)
     self.assertEqual(find_hadoop_java_stack_trace([]), None)
     self.assertEqual(find_interesting_hadoop_streaming_error([]), None)