def setUp(self):
        super(InterpretTaskLogsTestCase, self).setUp()

        self.runner._ls_task_syslogs = Mock()
        self._interpret_task_logs = (
            self.start(patch('mrjob.logs.mixin._interpret_task_logs')))
        self.runner.get_hadoop_version = Mock(return_value='2.7.1')
Example #2
0
    def _test_run_combiner(self, sort_values=False, num_reducers=None):
        rdd = self.mock_rdd()

        combiner_job = Mock()
        combiner_job.pick_protocols.return_value = (Mock(), Mock())

        final_rdd = _run_combiner(combiner_job,
                                  rdd,
                                  sort_values=sort_values,
                                  num_reducers=num_reducers)
        self.assertEqual(final_rdd, rdd)  # mock RDD's methods return it

        # check that we preserve partitions after calling combineByKey()
        #
        # Python 3.4 and 3.5's mock modules have slightly different ways
        # of tracking function calls. to work around this, we avoid calling
        # assert_called() and just inspect `method_calls` directly
        called_combineByKey = False
        for name, args, kwargs in rdd.method_calls:
            if called_combineByKey:
                # mapValues() doesn't have to use preservesPartitioning
                # because it's just encoding the list of all values for a key
                if name == 'mapValues':
                    f = args[0]
                    self._assert_maps_list_to_list_of_same_size(f)
                else:
                    self.assertEqual(kwargs.get('preservesPartitioning'), True)
            elif name == 'combineByKey':
                called_combineByKey = True

        # check that combineByKey() was actually called
        self.assertTrue(called_combineByKey)
Example #3
0
    def setUp(self):
        super(InterpretEMRStepStderrTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_step_syslog() should return, and have
        # _cat_log() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_paths_catted = []

        def mock_cat_log(fs, path):
            if path in self.mock_paths:
                self.mock_paths_catted.append(path)
            return path

        def mock_parse_task_stderr(path_from_mock_cat_log):
            return self.path_to_mock_result.get(path_from_mock_cat_log)

        # need to mock ls so that _ls_task_syslogs() can work
        def mock_exists(path):
            return path in self.mock_paths

        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log = self.start(
            patch('mrjob.logs.step._cat_log', side_effect=mock_cat_log))

        self.start(patch('mrjob.logs.step._parse_task_stderr',
                         side_effect=mock_parse_task_stderr))
Example #4
0
    def setUp(self):
        super(LsLogsTestCase, self).setUp()

        self.mock_fs = Mock()
        self.mock_paths = []

        def mock_fs_ls(log_dir):
            prefix = log_dir.rstrip('/') + '/'

            exists = False

            for p in self.mock_paths:
                if isinstance(p, Exception):
                    raise p
                elif p.startswith(prefix):
                    yield p
                    exists = True

            if not exists:
                raise IOError

        def mock_fs_exists(log_dir):
            return any(mock_fs_ls(log_dir))

        self.mock_fs.ls = Mock(side_effect=mock_fs_ls)
        self.mock_fs.exists = Mock(side_effect=mock_fs_exists)

        # a matcher that cheerfully passes through kwargs
        def mock_matcher(path, **kwargs):
            return dict(**kwargs)

        self.mock_matcher = Mock(side_effect=mock_matcher)
Example #5
0
    def _test_run_reducer(self, num_reducers=None):
        rdd = self.mock_rdd()

        reducer_job = Mock()
        reducer_job.pick_protocols.return_value = (Mock(), Mock())

        final_rdd = _run_reducer(reducer_job, rdd, num_reducers=num_reducers)
        self.assertEqual(final_rdd, rdd)  # mock RDD's methods return it

        called_mapPartitions = False

        before_map_partition = True
        for name, args, kwargs in rdd.method_calls:
            if name == 'mapPartitions':
                called_mapPartitions = True
                before_map_partition = False

            # We want to make sure we keep the original partition before
            # reaching to map_partition
            if before_map_partition:
                self.assertEqual(kwargs.get('preservesPartitioning'), True)

            # Once we finished from map_paratition, we don't care about if
            # we keep the same partition unless we want to fixed on number
            # of partitions
            else:
                self.assertEqual(kwargs.get('preservesPartitioning'),
                                 bool(num_reducers))

        # sanity-check that mapPartitions() was actually called
        self.assertTrue(called_mapPartitions)
Example #6
0
    def setUp(self):
        super(PickErrorTestCase, self).setUp()

        self.runner._interpret_history_log = Mock()
        self.runner._interpret_step_logs = Mock()
        self.runner._interpret_task_logs = Mock()

        self._pick_error = self.start(patch('mrjob.logs.mixin._pick_error'))
Example #7
0
 def test_try_till_success(self):
     a1 = Mock()
     a1.f = Mock(__name__='f', side_effect=[IOError, IOError, None])
     a = RetryWrapper(a1,
                      retry_if=lambda x: True,
                      backoff=0.0001,
                      max_tries=0)
     a.f()
     self.assertEqual(a1.f.call_count, 3)
Example #8
0
 def test_failure_raises_if_all_tries_fail(self):
     a1 = Mock()
     a1.f = Mock(__name__='f', side_effect=[IOError, IOError])
     a = RetryWrapper(a1,
                      retry_if=lambda x: True,
                      backoff=0.0001,
                      max_tries=2)
     with self.assertRaises(IOError):
         a.f()
     self.assertEqual(a1.f.call_count, 2)
Example #9
0
    def test_failure(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=[IOError, 1])
        a = RetryWrapper(a1,
                         retry_if=lambda x: True,
                         backoff=0.0001,
                         max_tries=2)

        self.assertEqual(a.f(), 1)
        self.assertEqual(a1.f.call_count, 2)
Example #10
0
    def test_success(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=None)
        a = RetryWrapper(a1,
                         retry_if=lambda x: True,
                         backoff=0.0001,
                         max_tries=2)

        a.f()
        a1.f.assert_called_once_with()
Example #11
0
    def setUp(self):
        super(CatLogsTestCase, self)

        self.mock_data = None

        self.mock_fs = Mock()
        self.mock_fs.cat = Mock(return_value=())
        self.mock_fs.exists = Mock(return_value=True)

        self.mock_log = self.start(patch('mrjob.logs.wrap.log'))
Example #12
0
    def setUp(self):
        super(StreamingLogDirsTestCase, self).setUp()

        self.log = self.start(patch('mrjob.hadoop.log'))

        self.runner = HadoopJobRunner()
        self.runner._hadoop_log_dirs = Mock(return_value=[])
        self.runner.fs.exists = Mock(return_value=True)

        self.log.reset_mock()  # ignore logging from HadoopJobRunner init
Example #13
0
    def test_wrapping(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f('foo', bar='baz'), 2)
        a1.f.assert_called_once_with('foo', bar='baz')
        a2.f.assert_called_once_with('foo', bar='baz')
        self.assertEqual(a.f.__name__, 'f')
Example #14
0
    def test_multiple_files(self):
        fs = Filesystem()

        fs.ls = Mock(return_value=['path1', 'path2', 'path3'])
        fs._cat_file = Mock(return_value=[b'chunk1\n', b'chunk2'])

        chunks = list(fs.cat('whatever'))

        self.assertEqual(chunks, [
            b'chunk1\n', b'chunk2', b'', b'chunk1\n', b'chunk2', b'',
            b'chunk1\n', b'chunk2'
        ])
Example #15
0
    def test_can_wrap_around(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=[IOError, 1])
        a2 = Mock()
        a2.f = Mock(__name__='f', side_effect=[2, IOError])

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f(), 2)
        self.assertEqual(a.f(), 1)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 2)
Example #16
0
    def test_unrecoverable_error(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=ValueError)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertRaises(ValueError, a.f)
        self.assertRaises(ValueError, a.f)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 0)
Example #17
0
    def test_success(self):
        a1 = Mock()
        # need __name__ so wraps() will work
        a1.f = Mock(__name__='f', return_value=1)
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.f(), 1)
        self.assertEqual(a1.f.call_count, 1)
        # never needed to try a2.f()
        self.assertEqual(a2.f.call_count, 0)
Example #18
0
    def setUp(self):
        super(InterpretTaskLogsTestCase, self).setUp()

        # instead of mocking out contents of files, just mock out
        # what _parse_task_{syslog,stderr}() should return, and have
        # _cat_log_lines() just pass through the path
        self.mock_paths = []
        self.path_to_mock_result = {}

        self.mock_log_callback = Mock()

        self.mock_paths_catted = []

        def mock_cat_log_lines(fs, path):
            if path in self.mock_paths:
                self.mock_paths_catted.append(path)
            return path

        # (the actual log-parsing functions take lines from the log)
        def mock_parse_task_syslog(path_from_mock_cat_log_lines):
            # default is {}
            return self.path_to_mock_result.get(path_from_mock_cat_log_lines,
                                                {})

        def mock_parse_task_stderr(path_from_mock_cat_log_lines):
            # default is None
            return self.path_to_mock_result.get(path_from_mock_cat_log_lines)

        def mock_exists(path):
            return path in self.mock_paths or path == 'MOCK_LOG_DIR'

        # need to mock ls so that _ls_task_logs() can work
        def mock_ls(log_dir):
            return self.mock_paths

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(side_effect=mock_exists)
        self.mock_fs.ls = Mock(side_effect=mock_ls)

        self.mock_cat_log_lines = self.start(
            patch('mrjob.logs.task._cat_log_lines',
                  side_effect=mock_cat_log_lines))

        self.start(
            patch('mrjob.logs.task._parse_task_syslog',
                  side_effect=mock_parse_task_syslog))
        self.start(
            patch('mrjob.logs.task._parse_task_stderr',
                  side_effect=mock_parse_task_stderr))
Example #19
0
    def setUp(self):
        super(LsLogsTestCase, self).setUp()

        self.mock_fs = Mock()
        self.mock_paths = []

        def mock_fs_ls(path):
            # we just ignore path, keeping it simple
            for p in self.mock_paths:
                if isinstance(p, Exception):
                    raise p
                else:
                    yield p

        self.mock_fs.ls = Mock(side_effect=mock_fs_ls)
Example #20
0
    def setUp(self):
        super(LsTaskLogsTestCase, self).setUp()

        self.mock_paths = []

        def mock_ls(log_dir):
            return [p for p in self.mock_paths if p.startswith(log_dir + '/')]

        def mock_join(path, *paths):
            return '/'.join([path] + list(paths))

        self.mock_fs = Mock()
        self.mock_fs.exists = Mock(return_value=True)
        self.mock_fs.join = Mock(side_effect=mock_join)
        self.mock_fs.ls = Mock(side_effect=mock_ls)
Example #21
0
    def setUp(self):
        super(CompositeFilesystemTestCase, self).setUp()

        self.log = self.start(patch('mrjob.fs.composite.log'))

        self.hadoop_fs = Mock(spec=Filesystem)
        self.hadoop_fs.get_hadoop_version = Mock()
        self.hadoop_fs.can_handle_path.side_effect = is_uri

        self.local_fs = Mock(spec=Filesystem)
        self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p)

        self.s3_fs = Mock(spec=Filesystem)
        self.s3_fs.create_bucket = Mock()
        self.s3_fs.can_handle_path.side_effect = is_s3_uri
Example #22
0
    def test_syslog_with_corresponding_stderr(self):
        syslog_path = '/userlogs/attempt_201512232143_0008_m_000001_3/syslog'
        stderr_path = '/userlogs/attempt_201512232143_0008_m_000001_3/stderr'
        mock_stderr_callback = Mock()

        self.mock_paths = [syslog_path, stderr_path]

        self.path_to_mock_result = {
            syslog_path: dict(hadoop_error=dict(message='BOOM')),
            stderr_path: dict(message='because, exploding code')
        }

        self.assertEqual(
            self.interpret_task_logs(stderr_callback=mock_stderr_callback),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_201512232143_0008_m_000001_3',
                        hadoop_error=dict(
                            message='BOOM',
                            path=syslog_path,
                        ),
                        task_error=dict(
                            message='because, exploding code',
                            path=stderr_path,
                        ),
                        task_id='task_201512232143_0008_m_000001',
                    ),
                ],
                partial=True,
            )
        )

        mock_stderr_callback.assert_called_once_with(stderr_path)
Example #23
0
    def test_syslog_with_empty_corresponding_stderr(self):
        syslog_path = '/userlogs/attempt_201512232143_0008_m_000001_3/syslog'
        stderr_path = '/userlogs/attempt_201512232143_0008_m_000001_3/stderr'
        mock_log_callback = Mock()

        self.mock_paths = [syslog_path, stderr_path]

        self.path_to_mock_result = {
            syslog_path: dict(hadoop_error=dict(message='BOOM')),
        }

        self.assertEqual(
            self.interpret_task_logs(log_callback=mock_log_callback),
            dict(
                errors=[
                    dict(
                        attempt_id='attempt_201512232143_0008_m_000001_3',
                        hadoop_error=dict(
                            message='BOOM',
                            path=syslog_path,
                        ),
                        task_id='task_201512232143_0008_m_000001',
                    ),
                ],
                partial=True,
            )
        )

        self.assertEqual(
            mock_log_callback.call_args_list,
            [call(stderr_path), call(syslog_path)])
Example #24
0
        def _mock_runner_class(runner_alias):
            rc = _runner_class(runner_alias)

            self.runner_class = Mock()
            self.runner_class.return_value = self.runner
            self.runner_class.alias = rc.alias
            self.runner_class.OPT_NAMES = rc.OPT_NAMES

            return self.runner_class
Example #25
0
    def setUp(self):
        super(LsTaskLogsTestCase, self).setUp()

        self._ls_task_logs = self.start(
            patch('mrjob.logs.mixin._ls_task_logs'))
        self._ls_spark_task_logs = self.start(
            patch('mrjob.logs.mixin._ls_spark_task_logs'))

        self.runner._stream_task_log_dirs = Mock()
Example #26
0
    def test_set_credentials_and_project_id(self):
        creds = Mock()
        project_id = 'alan-parsons'

        fs = GCSFilesystem(credentials=creds, project_id=project_id)
        self.assertFalse(self.log.warning.called)

        self.assertEqual(fs.client,
                         self.Client(project=project_id, credentials=creds))
Example #27
0
    def test_one_failure(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a1.x = 100
        a2 = Mock()
        a2.f = Mock(__name__='f', return_value=2)
        a2.x = 200

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.x, 100)
        self.assertEqual(a.f(), 2)
        # a2 was the last alternative that worked, so now we get x from it
        self.assertEqual(a.x, 200)
        # this time we should skip calling a1.f() entirely
        self.assertEqual(a.f(), 2)

        self.assertEqual(a1.f.call_count, 1)
        self.assertEqual(a2.f.call_count, 2)
Example #28
0
 def test_no_output(self):
     launcher = MRJobLauncher(args=['--no-conf', '--no-output', ''])
     launcher.sandbox()
     with patch.object(launcher, 'make_runner') as m_make_runner:
         runner = Mock()
         _mock_context_mgr(m_make_runner, runner)
         runner.stream_output.return_value = ['a line']
         launcher.run_job()
         self.assertEqual(launcher.stdout.getvalue(), b'')
         self.assertEqual(launcher.stderr.getvalue(), b'')
Example #29
0
    def setUp(self):
        super(InterpretEMRBootstrapStderrTestCase, self).setUp()

        self.mock_fs = Mock()

        self.mock_parse_task_stderr = self.start(
            patch('mrjob.logs.bootstrap._parse_task_stderr',
                  return_value=dict(message='BOOM!\n')))

        self.mock_cat_log = self.start(patch('mrjob.logs.bootstrap._cat_log'))
Example #30
0
    def test_all_fail(self):
        a1 = Mock()
        a1.f = Mock(__name__='f', side_effect=IOError)
        a1.x = 100
        a2 = Mock()
        a2.f = Mock(__name__='f', side_effect=IOError)
        a2.x = 200

        a = RetryGoRound([a1, a2], lambda ex: isinstance(ex, IOError))

        self.assertEqual(a.x, 100)
        # ran out of alternatives
        self.assertRaises(IOError, a.f)
        # nothing worked, so we're still pointing at a1
        self.assertEqual(a.x, 100)
        # yep, still broken
        self.assertRaises(IOError, a.f)

        self.assertEqual(a1.f.call_count, 2)
        self.assertEqual(a2.f.call_count, 2)