def test_syslog_with_corresponding_stderr(self): syslog_path = "/userlogs/attempt_201512232143_0008_m_000001_3/syslog" stderr_path = "/userlogs/attempt_201512232143_0008_m_000001_3/stderr" mock_stderr_callback = Mock() self.mock_paths = [syslog_path, stderr_path] self.path_to_mock_result = { syslog_path: dict(hadoop_error=dict(message="BOOM")), stderr_path: dict(message="because, exploding code"), } self.assertEqual( self.interpret_task_logs(stderr_callback=mock_stderr_callback), dict( errors=[ dict( attempt_id="attempt_201512232143_0008_m_000001_3", hadoop_error=dict(message="BOOM", path=syslog_path), task_error=dict(message="because, exploding code", path=stderr_path), task_id="task_201512232143_0008_m_000001", ) ], partial=True, ), ) mock_stderr_callback.assert_called_once_with(stderr_path)
def test_syslog_with_corresponding_stderr(self): syslog_path = '/userlogs/attempt_201512232143_0008_m_000001_3/syslog' stderr_path = '/userlogs/attempt_201512232143_0008_m_000001_3/stderr' mock_stderr_callback = Mock() self.mock_paths = [syslog_path, stderr_path] self.path_to_mock_result = { syslog_path: dict(hadoop_error=dict(message='BOOM')), stderr_path: dict(message='because, exploding code') } self.assertEqual( self.interpret_task_logs(stderr_callback=mock_stderr_callback), dict( errors=[ dict( attempt_id='attempt_201512232143_0008_m_000001_3', hadoop_error=dict( message='BOOM', path=syslog_path, ), task_error=dict( message='because, exploding code', path=stderr_path, ), task_id='task_201512232143_0008_m_000001', ), ], partial=True, ) ) mock_stderr_callback.assert_called_once_with(stderr_path)
def test_try_till_success(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=[IOError, IOError, None]) a = RetryWrapper( a1, retry_if=lambda x: True, backoff=0.0001, max_tries=0 ) a.f() self.assertEqual(a1.f.call_count, 3)
def test_wrapping(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=IOError) a2 = Mock() a2.f = Mock(__name__='f', return_value=2) a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertEqual(a.f('foo', bar='baz'), 2) a1.f.assert_called_once_with('foo', bar='baz') a2.f.assert_called_once_with('foo', bar='baz') self.assertEqual(a.f.__name__, 'f')
def test_success(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=None) a = RetryWrapper( a1, retry_if=lambda x: True, backoff=0.0001, max_tries=2 ) a.f() a1.f.assert_called_once_with()
def test_failure(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=[IOError, 1]) a = RetryWrapper( a1, retry_if=lambda x: True, backoff=0.0001, max_tries=2 ) self.assertEqual(a.f(), 1) self.assertEqual(a1.f.call_count, 2)
def test_failure_raises_if_all_tries_fail(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=[IOError, IOError]) a = RetryWrapper( a1, retry_if=lambda x: True, backoff=0.0001, max_tries=2 ) with self.assertRaises(IOError): a.f() self.assertEqual(a1.f.call_count, 2)
def test_can_wrap_around(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=[IOError, 1]) a2 = Mock() a2.f = Mock(__name__='f', side_effect=[2, IOError]) a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertEqual(a.f(), 2) self.assertEqual(a.f(), 1) self.assertEqual(a1.f.call_count, 2) self.assertEqual(a2.f.call_count, 2)
def test_success(self): a1 = Mock() # need __name__ so wraps() will work a1.f = Mock(__name__='f', return_value=1) a2 = Mock() a2.f = Mock(__name__='f', return_value=2) a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertEqual(a.f(), 1) self.assertEqual(a1.f.call_count, 1) # never needed to try a2.f() self.assertEqual(a2.f.call_count, 0)
def test_unrecoverable_error(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=ValueError) a2 = Mock() a2.f = Mock(__name__='f', return_value=2) a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertRaises(ValueError, a.f) self.assertRaises(ValueError, a.f) self.assertEqual(a1.f.call_count, 2) self.assertEqual(a2.f.call_count, 0)
def test_python3_jupyter_notebook(self): # regression test for #1441 # this actually works on any Python platform, since we use mocks mock_stdin = Mock() mock_stdin.buffer = Mock() mock_stdout = Mock() del mock_stdout.buffer mock_stderr = Mock() del mock_stderr.buffer with patch.multiple(sys, stdin=mock_stdin, stdout=mock_stdout, stderr=mock_stderr): launcher = MRJobLauncher(args=['/path/to/script']) self.assertEqual(launcher.stdin, mock_stdin.buffer) self.assertEqual(launcher.stdout, mock_stdout) self.assertEqual(launcher.stderr, mock_stderr)
def setUp(self): super(CompositeFilesystemTestCase, self).setUp() self.log = self.start(patch('mrjob.fs.composite.log')) self.hadoop_fs = Mock(spec=Filesystem) self.hadoop_fs.get_hadoop_version = Mock() self.hadoop_fs.can_handle_path.side_effect = is_uri self.local_fs = Mock(spec=Filesystem) self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p) self.s3_fs = Mock(spec=Filesystem) self.s3_fs.create_bucket = Mock() self.s3_fs.can_handle_path.side_effect = is_s3_uri
def setUp(self): super(InterpretSparkTaskLogsTestCase, self).setUp() # instead of mocking out contents of files, just mock out # what _parse_task_{syslog,stderr}() should return, and have # _cat_log_lines() just pass through the path self.mock_paths = [] self.path_to_mock_result = {} self.mock_log_callback = Mock() def mock_cat_log_lines(fs, path): if path in self.mock_paths: return path # (the actual log-parsing functions take lines from the log) def mock_parse_task_syslog(path_from_mock_cat_log_lines): # default is {} return self.path_to_mock_result.get( path_from_mock_cat_log_lines, {}) def mock_parse_task_stderr(path_from_mock_cat_log_lines): # default is None return self.path_to_mock_result.get(path_from_mock_cat_log_lines) def mock_exists(path): return path in self.mock_paths or path == 'MOCK_LOG_DIR' # need to mock ls so that _ls_spark_task_logs() can work def mock_ls(log_dir): return self.mock_paths self.mock_fs = Mock() self.mock_fs.exists = Mock(side_effect=mock_exists) self.mock_fs.ls = Mock(side_effect=mock_ls) self.mock_cat_log_lines = self.start( patch('mrjob.logs.task._cat_log_lines', side_effect=mock_cat_log_lines)) self.start(patch('mrjob.logs.task._parse_task_syslog', side_effect=mock_parse_task_syslog)) self.start(patch('mrjob.logs.task._parse_task_stderr', side_effect=mock_parse_task_stderr))
def test_one_failure(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=IOError) a1.x = 100 a2 = Mock() a2.f = Mock(__name__='f', return_value=2) a2.x = 200 a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertEqual(a.x, 100) self.assertEqual(a.f(), 2) # a2 was the last alternative that worked, so now we get x from it self.assertEqual(a.x, 200) # this time we should skip calling a1.f() entirely self.assertEqual(a.f(), 2) self.assertEqual(a1.f.call_count, 1) self.assertEqual(a2.f.call_count, 2)
def test_all_fail(self): a1 = Mock() a1.f = Mock(__name__='f', side_effect=IOError) a1.x = 100 a2 = Mock() a2.f = Mock(__name__='f', side_effect=IOError) a2.x = 200 a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError)) self.assertEqual(a.x, 100) # ran out of alternatives self.assertRaises(IOError, a.f) # nothing worked, so we're still pointing at a1 self.assertEqual(a.x, 100) # yep, still broken self.assertRaises(IOError, a.f) self.assertEqual(a1.f.call_count, 2) self.assertEqual(a2.f.call_count, 2)
def assert_bucket_validation(self, boto_version, should_validate): with patch('boto.Version', boto_version): s3_conn = Mock() _get_bucket(s3_conn, 'walrus') s3_conn.get_bucket.assert_called_once_with( 'walrus', validate=should_validate)
def setUp(self): super(InterpretStepLogTestCase, self).setUp() self.runner._get_step_log_interpretation = Mock()
def setUp(self): super(InterpretHistoryLogTestCase, self).setUp() self.runner._ls_history_logs = Mock() self._interpret_history_log = (self.start( patch('mrjob.logs.mixin._interpret_history_log')))
class InterpretSparkTaskLogsTestCase(PatcherTestCase): maxDiff = None def setUp(self): super(InterpretSparkTaskLogsTestCase, self).setUp() # instead of mocking out contents of files, just mock out # what _parse_task_{syslog,stderr}() should return, and have # _cat_log() just pass through the path self.mock_paths = [] self.path_to_mock_result = {} self.mock_log_callback = Mock() def mock_cat_log(fs, path): if path in self.mock_paths: return path # (the actual log-parsing functions take lines from the log) def mock_parse_task_syslog(path_from_mock_cat_log): # default is {} return self.path_to_mock_result.get(path_from_mock_cat_log, {}) def mock_parse_task_stderr(path_from_mock_cat_log): # default is None return self.path_to_mock_result.get(path_from_mock_cat_log) def mock_exists(path): return path in self.mock_paths or path == 'MOCK_LOG_DIR' # need to mock ls so that _ls_spark_task_logs() can work def mock_ls(log_dir): return self.mock_paths self.mock_fs = Mock() self.mock_fs.exists = Mock(side_effect=mock_exists) self.mock_fs.ls = Mock(side_effect=mock_ls) self.mock_cat_log = self.start( patch('mrjob.logs.task._cat_log', side_effect=mock_cat_log)) self.start( patch('mrjob.logs.task._parse_task_syslog', side_effect=mock_parse_task_syslog)) self.start( patch('mrjob.logs.task._parse_task_stderr', side_effect=mock_parse_task_stderr)) def mock_path_matches(self): mock_log_dir_stream = [['MOCK_LOG_DIR']] # _ls_logs() needs this return _ls_spark_task_logs(self.mock_fs, mock_log_dir_stream) def interpret_spark_task_logs(self, **kwargs): return _interpret_spark_task_logs(self.mock_fs, self.mock_path_matches(), log_callback=self.mock_log_callback, **kwargs) def test_empty(self): self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_no_error(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_split_only(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(split=dict(path='best_input_file_ever')) } self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_executor_error(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(hadoop_error=dict(message='BOOM')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='BOOM', path=stderr_path, ), ), ], partial=True, )) def test_stderr_with_executor_error_and_split(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(hadoop_error=dict(message='BOOM'), split=dict(path='best_input_file_ever')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='BOOM', path=stderr_path, ), split=dict(path='best_input_file_ever'), ), ], partial=True, )) def test_stderr_with_application_exited_and_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stderr_path: dict(check_stdout=True, hadoop_error=dict(message='application exited')), stdout_path: dict(message='because, exploding code') } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), task_error=dict( message='because, exploding code', path=stdout_path, ), ), ], partial=True, )) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path), call(stdout_path)]) def test_stderr_with_application_exited_and_empty_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stderr_path: dict(check_stdout=True, hadoop_error=dict(message='application exited')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), ), ], partial=True, )) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path), call(stdout_path)]) def test_stderr_with_application_exited_and_no_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(check_stdout=True, hadoop_error=dict(message='application exited')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), ), ], partial=True, )) self.assertEqual(self.mock_log_callback.call_args_list, [call(stderr_path)]) def test_error_in_stdout_only(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stdout_path: dict(message='because, exploding code') } self.assertEqual(self.interpret_spark_task_logs(), {}) self.assertEqual(self.mock_log_callback.call_args_list, [call(stderr_path)]) # indirectly tests _ls_spark_task_logs() and its ability to sort by # log type and recency def test_multiple_logs(self): stdout1_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000001/stdout') stderr1_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000001/stderr') stdout2_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000002/stdout') stderr2_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000002/stderr') stdout3_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000003/stdout') stderr3_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000003/stderr') stderr4_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [ stdout1_path, stderr1_path, stdout2_path, stderr2_path, stdout3_path, stderr3_path, stderr4_path, ] self.path_to_mock_result = { stderr1_path: dict(hadoop_error=dict(message='BOOM1')), stderr2_path: dict(check_stdout=True, hadoop_error=dict(message='exited with status 2')), stdout2_path: dict(message='BoomException'), stderr4_path: dict(check_stdout=True, hadoop_error=dict(message='exited with status 4')), # no errors for stdout1_path, stdout3_path, or stderr4_path } # we should read from stderr4_path first (later task number) self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='exited with status 4', path=stderr4_path, ), ), ], partial=True, )) self.assertEqual(self.mock_log_callback.call_args_list, [call(stderr4_path)]) # try again, with partial=False self.mock_log_callback.reset_mock() # paths still get sorted by _ls_logs() self.assertEqual( self.interpret_spark_task_logs(partial=False), dict(errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='exited with status 4', path=stderr4_path, ), ), dict( container_id='container_1450486922681_0005_01_000002', hadoop_error=dict( message='exited with status 2', path=stderr2_path, ), task_error=dict( message='BoomException', path=stdout2_path, ), ), dict( container_id='container_1450486922681_0005_01_000001', hadoop_error=dict( message='BOOM1', path=stderr1_path, ), ), ], )) self.assertEqual(self.mock_log_callback.call_args_list, [ call(stderr4_path), call(stderr3_path), call(stderr2_path), call(stdout2_path), call(stderr1_path), ])
def setUp(self): super(LsHistoryLogsTestCase, self).setUp() self._ls_history_logs = self.start( patch('mrjob.logs.mixin._ls_history_logs')) self.runner._stream_history_log_dirs = Mock()
class CompositeFilesystemTestCase(BasicTestCase): def setUp(self): super(CompositeFilesystemTestCase, self).setUp() self.log = self.start(patch('mrjob.fs.composite.log')) self.hadoop_fs = Mock(spec=Filesystem) self.hadoop_fs.get_hadoop_version = Mock() self.hadoop_fs.can_handle_path.side_effect = is_uri self.local_fs = Mock(spec=Filesystem) self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p) self.s3_fs = Mock(spec=Filesystem) self.s3_fs.create_bucket = Mock() self.s3_fs.can_handle_path.side_effect = is_s3_uri def test_empty_fs(self): fs = CompositeFilesystem() self.assertFalse(fs.can_handle_path('s3://walrus/fish')) self.assertFalse(fs.can_handle_path('/')) self.assertRaises(IOError, fs.ls, '/') def test_pick_fs(self): fs = CompositeFilesystem() fs.add_fs('s3', self.s3_fs) fs.add_fs('hadoop', self.hadoop_fs) self.assertEqual(fs.ls('s3://walrus/fish'), self.s3_fs.ls.return_value) # hadoop fs could have handled it, but s3_fs got it first self.assertTrue(self.hadoop_fs.can_handle_path('s3://walrus/fish')) self.assertFalse(self.hadoop_fs.ls.called) self.assertEqual(fs.ls('hdfs:///user/hadoop/'), self.hadoop_fs.ls.return_value) # don't move on to the next FS on an error (unlike old # CompositeFilesystem implementation) self.s3_fs.ls.side_effect = IOError self.assertRaises(IOError, fs.ls, 's3://walrus/fish') def test_forward_join(self): # join() is a special case since it takes multiple arguments fs = CompositeFilesystem() fs.add_fs('s3', self.s3_fs) self.assertEqual(fs.join('s3://walrus/fish', 'salmon'), self.s3_fs.join.return_value) self.s3_fs.join.assert_called_once_with( 's3://walrus/fish', 'salmon') def test_forward_put(self): # put() is a special case since the path that matters comes second fs = CompositeFilesystem() fs.add_fs('s3', self.s3_fs) fs.put('/path/to/file', 's3://walrus/file') self.s3_fs.put.assert_called_once_with( '/path/to/file', 's3://walrus/file') def test_forward_fs_extensions(self): fs = CompositeFilesystem() fs.add_fs('s3', self.s3_fs) fs.add_fs('hadoop', self.hadoop_fs) self.assertEqual(fs.create_bucket, self.s3_fs.create_bucket) self.assertEqual(fs.get_hadoop_version, self.hadoop_fs.get_hadoop_version) self.assertRaises(AttributeError, lambda: fs.client) def test_disable_fs(self): class NoCredentialsError(Exception): pass fs = CompositeFilesystem() # tentatively use S3 filesystem, if set up fs.add_fs('s3', self.s3_fs, disable_if=lambda ex: isinstance(ex, NoCredentialsError)) fs.add_fs('hadoop', self.hadoop_fs) self.s3_fs.ls.side_effect = NoCredentialsError # calling ls() on S3 fs disables it, so we move on to hadoop fs self.assertEqual(fs.ls('s3://walrus/'), self.hadoop_fs.ls.return_value) self.assertTrue(self.s3_fs.ls.called) self.assertIn('s3', fs._disabled) # now that s3 fs is disabled, we won't even try to call it self.assertEqual(fs.cat('s3://walrus/fish'), self.hadoop_fs.cat.return_value) self.assertFalse(self.s3_fs.cat.called)
def create_channel(self, target, credentials=None): channel = Mock() channel._channel = Mock() channel._channel.target = Mock(return_value=target) return channel
class InterpretSparkTaskLogsTestCase(BasicTestCase): maxDiff = None def setUp(self): super(InterpretSparkTaskLogsTestCase, self).setUp() # instead of mocking out contents of files, just mock out # what _parse_task_{syslog,stderr}() should return, and have # _cat_log_lines() just pass through the path self.mock_paths = [] self.path_to_mock_result = {} self.mock_log_callback = Mock() def mock_cat_log_lines(fs, path): if path in self.mock_paths: return path # (the actual log-parsing functions take lines from the log) def mock_parse_task_syslog(path_from_mock_cat_log_lines): # default is {} return self.path_to_mock_result.get( path_from_mock_cat_log_lines, {}) def mock_parse_task_stderr(path_from_mock_cat_log_lines): # default is None return self.path_to_mock_result.get(path_from_mock_cat_log_lines) def mock_exists(path): return path in self.mock_paths or path == 'MOCK_LOG_DIR' # need to mock ls so that _ls_spark_task_logs() can work def mock_ls(log_dir): return self.mock_paths self.mock_fs = Mock() self.mock_fs.exists = Mock(side_effect=mock_exists) self.mock_fs.ls = Mock(side_effect=mock_ls) self.mock_cat_log_lines = self.start( patch('mrjob.logs.task._cat_log_lines', side_effect=mock_cat_log_lines)) self.start(patch('mrjob.logs.task._parse_task_syslog', side_effect=mock_parse_task_syslog)) self.start(patch('mrjob.logs.task._parse_task_stderr', side_effect=mock_parse_task_stderr)) def mock_path_matches(self): mock_log_dir_stream = [['MOCK_LOG_DIR']] # _ls_logs() needs this return _ls_spark_task_logs(self.mock_fs, mock_log_dir_stream) def interpret_spark_task_logs(self, **kwargs): return _interpret_spark_task_logs( self.mock_fs, self.mock_path_matches(), log_callback=self.mock_log_callback, **kwargs) def test_empty(self): self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_no_error(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_split_only(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(split=dict(path='best_input_file_ever')) } self.assertEqual(self.interpret_spark_task_logs(), {}) def test_stderr_with_executor_error(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(hadoop_error=dict(message='BOOM')), } self.assertEqual(self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='BOOM', path=stderr_path, ), ), ], partial=True, )) def test_stderr_with_executor_error_and_split(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict(hadoop_error=dict(message='BOOM'), split=dict(path='best_input_file_ever')), } self.assertEqual(self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='BOOM', path=stderr_path, ), split=dict(path='best_input_file_ever'), ), ], partial=True, )) def test_stderr_with_application_exited_and_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stderr_path: dict( check_stdout=True, hadoop_error=dict(message='application exited')), stdout_path: dict(message='because, exploding code') } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), task_error=dict( message='because, exploding code', path=stdout_path, ), ), ], partial=True, ) ) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path), call(stdout_path)]) def test_stderr_with_application_exited_and_empty_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stderr_path: dict( check_stdout=True, hadoop_error=dict(message='application exited')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), ), ], partial=True, ) ) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path), call(stdout_path)]) def test_stderr_with_application_exited_and_no_stdout(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [stderr_path] self.path_to_mock_result = { stderr_path: dict( check_stdout=True, hadoop_error=dict(message='application exited')), } self.assertEqual( self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='application exited', path=stderr_path, ), ), ], partial=True, ) ) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path)]) def test_error_in_stdout_only(self): stderr_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') stdout_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stdout') self.mock_paths = [stderr_path, stdout_path] self.path_to_mock_result = { stdout_path: dict(message='because, exploding code') } self.assertEqual( self.interpret_spark_task_logs(), {}) self.assertEqual( self.mock_log_callback.call_args_list, [call(stderr_path)]) # indirectly tests _ls_spark_task_logs() and its ability to sort by # log type and recency def test_multiple_logs(self): stdout1_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000001/stdout') stderr1_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000001/stderr') stdout2_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000002/stdout') stderr2_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000002/stderr') stdout3_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000003/stdout') stderr3_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000003/stderr') stderr4_path = ('/log/dir/userlogs/application_1450486922681_0005' '/container_1450486922681_0005_01_000004/stderr') self.mock_paths = [ stdout1_path, stderr1_path, stdout2_path, stderr2_path, stdout3_path, stderr3_path, stderr4_path, ] self.path_to_mock_result = { stderr1_path: dict( hadoop_error=dict(message='BOOM1')), stderr2_path: dict( check_stdout=True, hadoop_error=dict(message='exited with status 2')), stdout2_path: dict(message='BoomException'), stderr4_path: dict( check_stdout=True, hadoop_error=dict(message='exited with status 4')), # no errors for stdout1_path, stdout3_path, or stderr4_path } # we should yield from stderr2_path first (latest task number that # has a corresponding stdout) self.assertEqual(self.interpret_spark_task_logs(), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000002', hadoop_error=dict( message='exited with status 2', path=stderr2_path, ), task_error=dict( message='BoomException', path=stdout2_path, ), ), ], partial=True, )) self.assertEqual(self.mock_log_callback.call_args_list, [ call(stderr3_path), call(stderr2_path), call(stdout2_path), ]) # try again, with partial=False self.mock_log_callback.reset_mock() # paths still get sorted by _ls_logs() self.assertEqual(self.interpret_spark_task_logs(partial=False), dict( errors=[ dict( container_id='container_1450486922681_0005_01_000002', hadoop_error=dict( message='exited with status 2', path=stderr2_path, ), task_error=dict( message='BoomException', path=stdout2_path, ), ), dict( container_id='container_1450486922681_0005_01_000001', hadoop_error=dict( message='BOOM1', path=stderr1_path, ), ), dict( container_id='container_1450486922681_0005_01_000004', hadoop_error=dict( message='exited with status 4', path=stderr4_path, ), ), ], )) self.assertEqual( self.mock_log_callback.call_args_list, [ call(stderr3_path), call(stderr2_path), call(stdout2_path), call(stderr1_path), call(stderr4_path), ] )
def setUp(self): super(PickCountersTestCase, self).setUp() self.runner._interpret_history_log = Mock() self.runner._interpret_step_logs = Mock()
def make_mock_mrc_job(mrc, step_num): job = Mock() job.pick_protocols.return_value = (Mock(), Mock()) return job