Ejemplo n.º 1
0
    def test_syslog_with_corresponding_stderr(self):
        syslog_path = "/userlogs/attempt_201512232143_0008_m_000001_3/syslog"
        stderr_path = "/userlogs/attempt_201512232143_0008_m_000001_3/stderr"
        mock_stderr_callback = Mock()

        self.mock_paths = [syslog_path, stderr_path]

        self.path_to_mock_result = {
            syslog_path: dict(hadoop_error=dict(message="BOOM")),
            stderr_path: dict(message="because, exploding code"),
        }

        self.assertEqual(
            self.interpret_task_logs(stderr_callback=mock_stderr_callback),
            dict(
                errors=[
                    dict(
                        attempt_id="attempt_201512232143_0008_m_000001_3",
                        hadoop_error=dict(message="BOOM", path=syslog_path),
                        task_error=dict(message="because, exploding code", path=stderr_path),
                        task_id="task_201512232143_0008_m_000001",
                    )
                ],
                partial=True,
            ),
        )

        mock_stderr_callback.assert_called_once_with(stderr_path)
Ejemplo n.º 2
0
    def test_syslog_with_corresponding_stderr(self):
        syslog_path = '/userlogs/attempt_201512232143_0008_m_000001_3/syslog'
        stderr_path = '/userlogs/attempt_201512232143_0008_m_000001_3/stderr'
        mock_stderr_callback = Mock()

        self.mock_paths = [syslog_path, stderr_path]

        self.path_to_mock_result = {
            syslog_path: dict(hadoop_error=dict(message='BOOM')),
            stderr_path: dict(message='because, exploding code')
        }

        self.assertEqual(
            self.interpret_task_logs(stderr_callback=mock_stderr_callback),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_201512232143_0008_m_000001_3',
                        hadoop_error=dict(
                            message='BOOM',
                            path=syslog_path,
                        ),
                        task_error=dict(
                            message='because, exploding code',
                            path=stderr_path,
                        ),
                        task_id='task_201512232143_0008_m_000001',
                    ),
                ],
                partial=True,
            )
        )

        mock_stderr_callback.assert_called_once_with(stderr_path)
Ejemplo n.º 3
0
 def test_try_till_success(self):
     a1 = Mock()
     a1.f = Mock(__name__='f', side_effect=[IOError, IOError, None])
     a = RetryWrapper(
         a1,
         retry_if=lambda x: True,
         backoff=0.0001,
         max_tries=0
     )
     a.f()
     self.assertEqual(a1.f.call_count, 3)
Ejemplo n.º 4
0
    def test_wrapping(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f('foo', bar='baz'), 2)
        a1.f.assert_called_once_with('foo', bar='baz')
        a2.f.assert_called_once_with('foo', bar='baz')
        self.assertEqual(a.f.__name__, 'f')
Ejemplo n.º 5
0
    def test_success(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=None)
        a = RetryWrapper(
            a1,
            retry_if=lambda x: True,
            backoff=0.0001,
            max_tries=2
        )

        a.f()
        a1.f.assert_called_once_with()
Ejemplo n.º 6
0
    def test_failure(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=[IOError, 1])
        a = RetryWrapper(
            a1,
            retry_if=lambda x: True,
            backoff=0.0001,
            max_tries=2
        )

        self.assertEqual(a.f(), 1)
        self.assertEqual(a1.f.call_count, 2)
Ejemplo n.º 7
0
 def test_failure_raises_if_all_tries_fail(self):
     a1 = Mock()
     a1.f = Mock(__name__='f', side_effect=[IOError, IOError])
     a = RetryWrapper(
         a1,
         retry_if=lambda x: True,
         backoff=0.0001,
         max_tries=2
     )
     with self.assertRaises(IOError):
         a.f()
     self.assertEqual(a1.f.call_count, 2)
Ejemplo n.º 8
0
    def test_can_wrap_around(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=[IOError, 1])
        a2 = Mock()
        a2.f = Mock(__name__='f', side_effect=[2, IOError])

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f(), 2)
        self.assertEqual(a.f(), 1)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 2)
Ejemplo n.º 9
0
    def test_success(self):
        a1 = Mock()
        # need __name__ so wraps() will work
        a1.f = Mock(__name__='f', return_value=1)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f(), 1)
        self.assertEqual(a1.f.call_count, 1)
        # never needed to try a2.f()
        self.assertEqual(a2.f.call_count, 0)
Ejemplo n.º 10
0
    def test_unrecoverable_error(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=ValueError)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertRaises(ValueError, a.f)
        self.assertRaises(ValueError, a.f)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 0)
Ejemplo n.º 11
0
    def test_python3_jupyter_notebook(self):
        # regression test for #1441

        # this actually works on any Python platform, since we use mocks
        mock_stdin = Mock()
        mock_stdin.buffer = Mock()

        mock_stdout = Mock()
        del mock_stdout.buffer

        mock_stderr = Mock()
        del mock_stderr.buffer

        with patch.multiple(sys, stdin=mock_stdin,
                            stdout=mock_stdout, stderr=mock_stderr):
            launcher = MRJobLauncher(args=['/path/to/script'])

        self.assertEqual(launcher.stdin, mock_stdin.buffer)
        self.assertEqual(launcher.stdout, mock_stdout)
        self.assertEqual(launcher.stderr, mock_stderr)
Ejemplo n.º 12
0
    def setUp(self):
        super(CompositeFilesystemTestCase, self).setUp()

        self.log = self.start(patch('mrjob.fs.composite.log'))

        self.hadoop_fs = Mock(spec=Filesystem)
        self.hadoop_fs.get_hadoop_version = Mock()
        self.hadoop_fs.can_handle_path.side_effect = is_uri

        self.local_fs = Mock(spec=Filesystem)
        self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p)

        self.s3_fs = Mock(spec=Filesystem)
        self.s3_fs.create_bucket = Mock()
        self.s3_fs.can_handle_path.side_effect = is_s3_uri
Ejemplo n.º 13
0
    def setUp(self):
        super(InterpretSparkTaskLogsTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_task_{syslog,stderr}() should return, and have
        # _cat_log_lines() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_log_callback = Mock()

        def mock_cat_log_lines(fs, path):
            if path in self.mock_paths:
                return path

        # (the actual log-parsing functions take lines from the log)
        def mock_parse_task_syslog(path_from_mock_cat_log_lines):
            # default is {}
            return self.path_to_mock_result.get(
                path_from_mock_cat_log_lines, {})

        def mock_parse_task_stderr(path_from_mock_cat_log_lines):
            # default is None
            return self.path_to_mock_result.get(path_from_mock_cat_log_lines)

        def mock_exists(path):
            return path in self.mock_paths or path == 'MOCK_LOG_DIR'

        # need to mock ls so that _ls_spark_task_logs() can work
        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(side_effect=mock_exists)
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log_lines = self.start(
            patch('mrjob.logs.task._cat_log_lines',
                  side_effect=mock_cat_log_lines))

        self.start(patch('mrjob.logs.task._parse_task_syslog',
                         side_effect=mock_parse_task_syslog))
        self.start(patch('mrjob.logs.task._parse_task_stderr',
                         side_effect=mock_parse_task_stderr))
Ejemplo n.º 14
0
    def test_one_failure(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a1.x = 100
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)
        a2.x = 200

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.x, 100)
        self.assertEqual(a.f(), 2)
        # a2 was the last alternative that worked, so now we get x from it
        self.assertEqual(a.x, 200)
        # this time we should skip calling a1.f() entirely
        self.assertEqual(a.f(), 2)

        self.assertEqual(a1.f.call_count, 1)
        self.assertEqual(a2.f.call_count, 2)
Ejemplo n.º 15
0
    def test_all_fail(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a1.x = 100
        a2 = Mock()
        a2.f = Mock(__name__='f', side_effect=IOError)
        a2.x = 200

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.x, 100)
        # ran out of alternatives
        self.assertRaises(IOError, a.f)
        # nothing worked, so we're still pointing at a1
        self.assertEqual(a.x, 100)
        # yep, still broken
        self.assertRaises(IOError, a.f)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 2)
Ejemplo n.º 16
0
 def assert_bucket_validation(self, boto_version, should_validate):
     with patch('boto.Version', boto_version):
         s3_conn = Mock()
         _get_bucket(s3_conn, 'walrus')
         s3_conn.get_bucket.assert_called_once_with(
             'walrus', validate=should_validate)
Ejemplo n.º 17
0
    def setUp(self):
        super(InterpretStepLogTestCase, self).setUp()

        self.runner._get_step_log_interpretation = Mock()
Ejemplo n.º 18
0
    def setUp(self):
        super(InterpretHistoryLogTestCase, self).setUp()

        self.runner._ls_history_logs = Mock()
        self._interpret_history_log = (self.start(
            patch('mrjob.logs.mixin._interpret_history_log')))
Ejemplo n.º 19
0
class InterpretSparkTaskLogsTestCase(PatcherTestCase):

    maxDiff = None

    def setUp(self):
        super(InterpretSparkTaskLogsTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_task_{syslog,stderr}() should return, and have
        # _cat_log() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_log_callback = Mock()

        def mock_cat_log(fs, path):
            if path in self.mock_paths:
                return path

        # (the actual log-parsing functions take lines from the log)
        def mock_parse_task_syslog(path_from_mock_cat_log):
            # default is {}
            return self.path_to_mock_result.get(path_from_mock_cat_log, {})

        def mock_parse_task_stderr(path_from_mock_cat_log):
            # default is None
            return self.path_to_mock_result.get(path_from_mock_cat_log)

        def mock_exists(path):
            return path in self.mock_paths or path == 'MOCK_LOG_DIR'

        # need to mock ls so that _ls_spark_task_logs() can work
        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(side_effect=mock_exists)
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log = self.start(
            patch('mrjob.logs.task._cat_log', side_effect=mock_cat_log))

        self.start(
            patch('mrjob.logs.task._parse_task_syslog',
                  side_effect=mock_parse_task_syslog))
        self.start(
            patch('mrjob.logs.task._parse_task_stderr',
                  side_effect=mock_parse_task_stderr))

    def mock_path_matches(self):
        mock_log_dir_stream = [['MOCK_LOG_DIR']]  # _ls_logs() needs this
        return _ls_spark_task_logs(self.mock_fs, mock_log_dir_stream)

    def interpret_spark_task_logs(self, **kwargs):
        return _interpret_spark_task_logs(self.mock_fs,
                                          self.mock_path_matches(),
                                          log_callback=self.mock_log_callback,
                                          **kwargs)

    def test_empty(self):
        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_no_error(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_split_only(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(split=dict(path='best_input_file_ever'))
        }

        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_executor_error(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(hadoop_error=dict(message='BOOM')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='BOOM',
                            path=stderr_path,
                        ),
                    ),
                ],
                partial=True,
            ))

    def test_stderr_with_executor_error_and_split(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path:
            dict(hadoop_error=dict(message='BOOM'),
                 split=dict(path='best_input_file_ever')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='BOOM',
                            path=stderr_path,
                        ),
                        split=dict(path='best_input_file_ever'),
                    ),
                ],
                partial=True,
            ))

    def test_stderr_with_application_exited_and_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stderr_path:
            dict(check_stdout=True,
                 hadoop_error=dict(message='application exited')),
            stdout_path:
            dict(message='because, exploding code')
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                        task_error=dict(
                            message='because, exploding code',
                            path=stdout_path,
                        ),
                    ),
                ],
                partial=True,
            ))

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path), call(stdout_path)])

    def test_stderr_with_application_exited_and_empty_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stderr_path:
            dict(check_stdout=True,
                 hadoop_error=dict(message='application exited')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                    ),
                ],
                partial=True,
            ))

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path), call(stdout_path)])

    def test_stderr_with_application_exited_and_no_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path:
            dict(check_stdout=True,
                 hadoop_error=dict(message='application exited')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                    ),
                ],
                partial=True,
            ))

        self.assertEqual(self.mock_log_callback.call_args_list,
                         [call(stderr_path)])

    def test_error_in_stdout_only(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stdout_path: dict(message='because, exploding code')
        }

        self.assertEqual(self.interpret_spark_task_logs(), {})

        self.assertEqual(self.mock_log_callback.call_args_list,
                         [call(stderr_path)])

    # indirectly tests _ls_spark_task_logs() and its ability to sort by
    # log type and recency
    def test_multiple_logs(self):
        stdout1_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000001/stdout')
        stderr1_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000001/stderr')
        stdout2_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000002/stdout')
        stderr2_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000002/stderr')
        stdout3_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000003/stdout')
        stderr3_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000003/stderr')
        stderr4_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [
            stdout1_path,
            stderr1_path,
            stdout2_path,
            stderr2_path,
            stdout3_path,
            stderr3_path,
            stderr4_path,
        ]

        self.path_to_mock_result = {
            stderr1_path:
            dict(hadoop_error=dict(message='BOOM1')),
            stderr2_path:
            dict(check_stdout=True,
                 hadoop_error=dict(message='exited with status 2')),
            stdout2_path:
            dict(message='BoomException'),
            stderr4_path:
            dict(check_stdout=True,
                 hadoop_error=dict(message='exited with status 4')),
            # no errors for stdout1_path, stdout3_path, or stderr4_path
        }

        # we should read from stderr4_path first (later task number)
        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='exited with status 4',
                            path=stderr4_path,
                        ),
                    ),
                ],
                partial=True,
            ))

        self.assertEqual(self.mock_log_callback.call_args_list,
                         [call(stderr4_path)])

        # try again, with partial=False
        self.mock_log_callback.reset_mock()

        # paths still get sorted by _ls_logs()
        self.assertEqual(
            self.interpret_spark_task_logs(partial=False),
            dict(errors=[
                dict(
                    container_id='container_1450486922681_0005_01_000004',
                    hadoop_error=dict(
                        message='exited with status 4',
                        path=stderr4_path,
                    ),
                ),
                dict(
                    container_id='container_1450486922681_0005_01_000002',
                    hadoop_error=dict(
                        message='exited with status 2',
                        path=stderr2_path,
                    ),
                    task_error=dict(
                        message='BoomException',
                        path=stdout2_path,
                    ),
                ),
                dict(
                    container_id='container_1450486922681_0005_01_000001',
                    hadoop_error=dict(
                        message='BOOM1',
                        path=stderr1_path,
                    ),
                ),
            ], ))

        self.assertEqual(self.mock_log_callback.call_args_list, [
            call(stderr4_path),
            call(stderr3_path),
            call(stderr2_path),
            call(stdout2_path),
            call(stderr1_path),
        ])
Ejemplo n.º 20
0
    def setUp(self):
        super(LsHistoryLogsTestCase, self).setUp()

        self._ls_history_logs = self.start(
            patch('mrjob.logs.mixin._ls_history_logs'))
        self.runner._stream_history_log_dirs = Mock()
Ejemplo n.º 21
0
class CompositeFilesystemTestCase(BasicTestCase):

    def setUp(self):
        super(CompositeFilesystemTestCase, self).setUp()

        self.log = self.start(patch('mrjob.fs.composite.log'))

        self.hadoop_fs = Mock(spec=Filesystem)
        self.hadoop_fs.get_hadoop_version = Mock()
        self.hadoop_fs.can_handle_path.side_effect = is_uri

        self.local_fs = Mock(spec=Filesystem)
        self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p)

        self.s3_fs = Mock(spec=Filesystem)
        self.s3_fs.create_bucket = Mock()
        self.s3_fs.can_handle_path.side_effect = is_s3_uri

    def test_empty_fs(self):
        fs = CompositeFilesystem()

        self.assertFalse(fs.can_handle_path('s3://walrus/fish'))
        self.assertFalse(fs.can_handle_path('/'))

        self.assertRaises(IOError, fs.ls, '/')

    def test_pick_fs(self):
        fs = CompositeFilesystem()

        fs.add_fs('s3', self.s3_fs)
        fs.add_fs('hadoop', self.hadoop_fs)

        self.assertEqual(fs.ls('s3://walrus/fish'),
                         self.s3_fs.ls.return_value)
        # hadoop fs could have handled it, but s3_fs got it first
        self.assertTrue(self.hadoop_fs.can_handle_path('s3://walrus/fish'))
        self.assertFalse(self.hadoop_fs.ls.called)

        self.assertEqual(fs.ls('hdfs:///user/hadoop/'),
                         self.hadoop_fs.ls.return_value)

        # don't move on to the next FS on an error (unlike old
        # CompositeFilesystem implementation)
        self.s3_fs.ls.side_effect = IOError

        self.assertRaises(IOError, fs.ls, 's3://walrus/fish')

    def test_forward_join(self):
        # join() is a special case since it takes multiple arguments
        fs = CompositeFilesystem()

        fs.add_fs('s3', self.s3_fs)

        self.assertEqual(fs.join('s3://walrus/fish', 'salmon'),
                         self.s3_fs.join.return_value)
        self.s3_fs.join.assert_called_once_with(
            's3://walrus/fish', 'salmon')

    def test_forward_put(self):
        # put() is a special case since the path that matters comes second
        fs = CompositeFilesystem()

        fs.add_fs('s3', self.s3_fs)

        fs.put('/path/to/file', 's3://walrus/file')
        self.s3_fs.put.assert_called_once_with(
            '/path/to/file', 's3://walrus/file')

    def test_forward_fs_extensions(self):
        fs = CompositeFilesystem()

        fs.add_fs('s3', self.s3_fs)
        fs.add_fs('hadoop', self.hadoop_fs)

        self.assertEqual(fs.create_bucket, self.s3_fs.create_bucket)
        self.assertEqual(fs.get_hadoop_version,
                         self.hadoop_fs.get_hadoop_version)

        self.assertRaises(AttributeError, lambda: fs.client)

    def test_disable_fs(self):
        class NoCredentialsError(Exception):
            pass

        fs = CompositeFilesystem()

        # tentatively use S3 filesystem, if set up
        fs.add_fs('s3', self.s3_fs,
                  disable_if=lambda ex: isinstance(ex, NoCredentialsError))
        fs.add_fs('hadoop', self.hadoop_fs)

        self.s3_fs.ls.side_effect = NoCredentialsError

        # calling ls() on S3 fs disables it, so we move on to hadoop fs
        self.assertEqual(fs.ls('s3://walrus/'),
                         self.hadoop_fs.ls.return_value)
        self.assertTrue(self.s3_fs.ls.called)

        self.assertIn('s3', fs._disabled)

        # now that s3 fs is disabled, we won't even try to call it
        self.assertEqual(fs.cat('s3://walrus/fish'),
                         self.hadoop_fs.cat.return_value)
        self.assertFalse(self.s3_fs.cat.called)
Ejemplo n.º 22
0
    def create_channel(self, target, credentials=None):
        channel = Mock()
        channel._channel = Mock()
        channel._channel.target = Mock(return_value=target)

        return channel
Ejemplo n.º 23
0
class InterpretSparkTaskLogsTestCase(BasicTestCase):

    maxDiff = None

    def setUp(self):
        super(InterpretSparkTaskLogsTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_task_{syslog,stderr}() should return, and have
        # _cat_log_lines() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_log_callback = Mock()

        def mock_cat_log_lines(fs, path):
            if path in self.mock_paths:
                return path

        # (the actual log-parsing functions take lines from the log)
        def mock_parse_task_syslog(path_from_mock_cat_log_lines):
            # default is {}
            return self.path_to_mock_result.get(
                path_from_mock_cat_log_lines, {})

        def mock_parse_task_stderr(path_from_mock_cat_log_lines):
            # default is None
            return self.path_to_mock_result.get(path_from_mock_cat_log_lines)

        def mock_exists(path):
            return path in self.mock_paths or path == 'MOCK_LOG_DIR'

        # need to mock ls so that _ls_spark_task_logs() can work
        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(side_effect=mock_exists)
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log_lines = self.start(
            patch('mrjob.logs.task._cat_log_lines',
                  side_effect=mock_cat_log_lines))

        self.start(patch('mrjob.logs.task._parse_task_syslog',
                         side_effect=mock_parse_task_syslog))
        self.start(patch('mrjob.logs.task._parse_task_stderr',
                         side_effect=mock_parse_task_stderr))

    def mock_path_matches(self):
        mock_log_dir_stream = [['MOCK_LOG_DIR']]  # _ls_logs() needs this
        return _ls_spark_task_logs(self.mock_fs, mock_log_dir_stream)

    def interpret_spark_task_logs(self, **kwargs):
        return _interpret_spark_task_logs(
            self.mock_fs, self.mock_path_matches(),
            log_callback=self.mock_log_callback,
            **kwargs)

    def test_empty(self):
        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_no_error(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_split_only(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(split=dict(path='best_input_file_ever'))
        }

        self.assertEqual(self.interpret_spark_task_logs(), {})

    def test_stderr_with_executor_error(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(hadoop_error=dict(message='BOOM')),
        }

        self.assertEqual(self.interpret_spark_task_logs(), dict(
            errors=[
                dict(
                    container_id='container_1450486922681_0005_01_000004',
                    hadoop_error=dict(
                        message='BOOM',
                        path=stderr_path,
                    ),
                ),
            ],
            partial=True,
        ))

    def test_stderr_with_executor_error_and_split(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(hadoop_error=dict(message='BOOM'),
                              split=dict(path='best_input_file_ever')),
        }

        self.assertEqual(self.interpret_spark_task_logs(), dict(
            errors=[
                dict(
                    container_id='container_1450486922681_0005_01_000004',
                    hadoop_error=dict(
                        message='BOOM',
                        path=stderr_path,
                    ),
                    split=dict(path='best_input_file_ever'),
                ),
            ],
            partial=True,
        ))

    def test_stderr_with_application_exited_and_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stderr_path: dict(
                check_stdout=True,
                hadoop_error=dict(message='application exited')),
            stdout_path: dict(message='because, exploding code')
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                        task_error=dict(
                            message='because, exploding code',
                            path=stdout_path,
                        ),
                    ),
                ],
                partial=True,
            )
        )

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path), call(stdout_path)])

    def test_stderr_with_application_exited_and_empty_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stderr_path: dict(
                check_stdout=True,
                hadoop_error=dict(message='application exited')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                    ),
                ],
                partial=True,
            )
        )

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path), call(stdout_path)])

    def test_stderr_with_application_exited_and_no_stdout(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [stderr_path]

        self.path_to_mock_result = {
            stderr_path: dict(
                check_stdout=True,
                hadoop_error=dict(message='application exited')),
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            dict(
                errors=[
                    dict(
                        container_id='container_1450486922681_0005_01_000004',
                        hadoop_error=dict(
                            message='application exited',
                            path=stderr_path,
                        ),
                    ),
                ],
                partial=True,
            )
        )

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path)])

    def test_error_in_stdout_only(self):
        stderr_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stderr')
        stdout_path = ('/log/dir/userlogs/application_1450486922681_0005'
                       '/container_1450486922681_0005_01_000004/stdout')

        self.mock_paths = [stderr_path, stdout_path]

        self.path_to_mock_result = {
            stdout_path: dict(message='because, exploding code')
        }

        self.assertEqual(
            self.interpret_spark_task_logs(),
            {})

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [call(stderr_path)])

    # indirectly tests _ls_spark_task_logs() and its ability to sort by
    # log type and recency
    def test_multiple_logs(self):
        stdout1_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000001/stdout')
        stderr1_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000001/stderr')
        stdout2_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000002/stdout')
        stderr2_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000002/stderr')
        stdout3_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000003/stdout')
        stderr3_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000003/stderr')
        stderr4_path = ('/log/dir/userlogs/application_1450486922681_0005'
                        '/container_1450486922681_0005_01_000004/stderr')

        self.mock_paths = [
            stdout1_path,
            stderr1_path,
            stdout2_path,
            stderr2_path,
            stdout3_path,
            stderr3_path,
            stderr4_path,
        ]

        self.path_to_mock_result = {
            stderr1_path: dict(
                hadoop_error=dict(message='BOOM1')),
            stderr2_path: dict(
                check_stdout=True,
                hadoop_error=dict(message='exited with status 2')),
            stdout2_path: dict(message='BoomException'),
            stderr4_path: dict(
                check_stdout=True,
                hadoop_error=dict(message='exited with status 4')),
            # no errors for stdout1_path, stdout3_path, or stderr4_path
        }

        # we should yield from stderr2_path first (latest task number that
        # has a corresponding stdout)
        self.assertEqual(self.interpret_spark_task_logs(), dict(
            errors=[
                dict(
                    container_id='container_1450486922681_0005_01_000002',
                    hadoop_error=dict(
                        message='exited with status 2',
                        path=stderr2_path,
                    ),
                    task_error=dict(
                        message='BoomException',
                        path=stdout2_path,
                    ),
                ),
            ],
            partial=True,
        ))

        self.assertEqual(self.mock_log_callback.call_args_list, [
            call(stderr3_path),
            call(stderr2_path),
            call(stdout2_path),
        ])

        # try again, with partial=False
        self.mock_log_callback.reset_mock()

        # paths still get sorted by _ls_logs()
        self.assertEqual(self.interpret_spark_task_logs(partial=False), dict(
            errors=[
                dict(
                    container_id='container_1450486922681_0005_01_000002',
                    hadoop_error=dict(
                        message='exited with status 2',
                        path=stderr2_path,
                    ),
                    task_error=dict(
                        message='BoomException',
                        path=stdout2_path,
                    ),
                ),
                dict(
                    container_id='container_1450486922681_0005_01_000001',
                    hadoop_error=dict(
                        message='BOOM1',
                        path=stderr1_path,
                    ),
                ),
                dict(
                    container_id='container_1450486922681_0005_01_000004',
                    hadoop_error=dict(
                        message='exited with status 4',
                        path=stderr4_path,
                    ),
                ),
            ],
        ))

        self.assertEqual(
            self.mock_log_callback.call_args_list,
            [
                call(stderr3_path),
                call(stderr2_path),
                call(stdout2_path),
                call(stderr1_path),
                call(stderr4_path),
            ]
        )
Ejemplo n.º 24
0
    def setUp(self):
        super(PickCountersTestCase, self).setUp()

        self.runner._interpret_history_log = Mock()
        self.runner._interpret_step_logs = Mock()
Ejemplo n.º 25
0
        def make_mock_mrc_job(mrc, step_num):
            job = Mock()
            job.pick_protocols.return_value = (Mock(), Mock())

            return job
Ejemplo n.º 26
0
Archivo: case.py Proyecto: Yelp/mrjob
    def create_channel(self, target, credentials=None):
        channel = Mock()
        channel._channel = Mock()
        channel._channel.target = Mock(return_value=target)

        return channel